First, we do some setup


In [1]:
import emission.storage.decorations.location_queries as lq

In [2]:
reload(lq)


Out[2]:
<module 'emission.storage.decorations.location_queries' from '/Users/shankari/e-mission/e-mission-server/emission/storage/decorations/location_queries.pyc'>

In [3]:
import logging
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logging.debug("test")


DEBUG:root:test

In [4]:
%matplotlib inline

Then, we display the maps for one section as a "unit test"

Note that we need to have separate unit tests for the analysis functions that will be run as part of the ongoing pipeline. This just tests the part that puts them all together and displays the map


In [5]:
ts = lq.get_section("20150120T083030-0800_0"); ts


Out[5]:
AttrDict({u'start_time': u'20150120T083030-0800', u'end_ts': 1421773972.0, u'start_ts': 1421771430.0, u'filter': u'time', u'source': u'raw_auto', u'end_time': u'20150120T091252-0800', u'_id': ObjectId('55c84b74f6858f75580c2dca'), u'id': u'20150120T083030-0800_0', u'prev_section': u'20150120T081412-0800_0'})

In [6]:
import emission.analysis.plotting.leaflet_osm.our_plotter as lo

In [7]:
reload(lo)


Out[7]:
<module 'emission.analysis.plotting.leaflet_osm.our_plotter' from '/Users/shankari/e-mission/e-mission-server/emission/analysis/plotting/leaflet_osm/our_plotter.pyc'>

In [8]:
import emission.analysis.classification.cleaning.speed_outlier_detection as cso
import emission.analysis.classification.cleaning.jump_smoothing as cjs

In [389]:
reload(cso)
reload(cjs)


Out[389]:
<module 'emission.analysis.classification.cleaning.jump_smoothing' from '/Users/shankari/e-mission/e-mission-server/emission/analysis/classification/cleaning/jump_smoothing.py'>

In [10]:
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

Order of results


In [11]:
import itertools
for (o, a) in itertools.product(["BoxplotOutlier", "SimpleQuartileOutlier"],
                            ["SmoothPiecewiseRansac", "SmoothBoundary", "SmoothPosdap"]):
    print o, a


BoxplotOutlier SmoothPiecewiseRansac
BoxplotOutlier SmoothBoundary
BoxplotOutlier SmoothPosdap
SimpleQuartileOutlier SmoothPiecewiseRansac
SimpleQuartileOutlier SmoothBoundary
SimpleQuartileOutlier SmoothPosdap

In [12]:
import json

In [288]:
section_ground_truth_list = json.load(open("/Users/shankari/cluster_ground_truth/smoothing/smoothing_removed_points_combined"))

In [289]:
extract_raw_section_id = lambda(uuid_key): '_'.join(uuid_key.split('_')[1:])

In [290]:
section_id_list = [extract_raw_section_id(key) for key in section_ground_truth_list.keys()]

In [291]:
section_list = [lq.get_section(sid) for sid in section_id_list if lq.get_section(sid) is not None]


WARNING:root:Did not find match for section 20150227T193156-0800_2, returning None
WARNING:root:Did not find match for section 20150304T143900-0800_1, returning None
WARNING:root:Did not find match for section 20150217T163802-0800_1, returning None
WARNING:root:Did not find match for section 20150218T180624-0800_1, returning None
WARNING:root:Did not find match for section 20150119T075756-0800_0, returning None
WARNING:root:Did not find match for section 20150407T080139-0700_1, returning None
WARNING:root:Did not find match for section 20150331T084219-0700_4, returning None

In [292]:
import emission.analysis.classification.cleaning.location_smoothing as ls

In [470]:
reload(ls)


Out[470]:
<module 'emission.analysis.classification.cleaning.location_smoothing' from '/Users/shankari/e-mission/e-mission-server/emission/analysis/classification/cleaning/location_smoothing.py'>

In [20]:
def two_rows(map_list):
    ret_list = [[map_list[0], map_list[2], map_list[3], map_list[4]], [map_list[1], map_list[5], map_list[6], map_list[7]]]
    return ret_list

In [21]:
import emission.analysis.plotting.leaflet_osm.ipython_helper as ipy

In [22]:
import numpy as np

In [294]:
import copy as copy

In [331]:
import pandas as pd

Confirm that the accuracy filter doesn't work sometimes

Section 18 ending at Mountain View

Train section, needs work


In [295]:
section_train_only = copy.copy(section_list[18])
section_train_only.end_ts = 1427903481.0

In [246]:
section_train_only_df = ls.get_section_points(section_train_only)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1427901675.0}}, {'data.mTime': {'$lt': 1427903481.0}}], 'user_id': u'20150401T072115-0700_0', 'metadata.filter': u'time'} 

In [248]:
with_features_train_only_df = ls.add_heading(ls.add_heading_change(ls.add_speed(ls.filter_accuracy(section_train_only_df))))


filtering points Int64Index([2, 4, 7, 10, 14, 21, 22, 23, 26, 38, 48, 49], dtype='int64')
filtered list size went from (51, 8) to (39, 8)

In [249]:
section_train_only_iqr_threshold = cso.BoxplotOutlier(ignore_zeros=True).get_threshold(with_features_train_only_df)


DEBUG:root:quartile values are 0.25    10078.712130
0.75    93479.100745
Name: speed, dtype: float64
DEBUG:root:iqr 83400.3886148

In [251]:
section_train_only_iqr_threshold


Out[251]:
343680.26658981602

In [252]:
with_features_train_only_df[with_features_train_only_df.speed > section_train_only_iqr_threshold]


Out[252]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed heading_change heading
1 time 1970-01-17 04:38:21.743000 2 background/location 36.000000 37.587769 -122.362285 1427901743 23266.370435 646288.067650 0.000000 156.096104
4 time 1970-01-17 04:38:21.919000 7 background/location 27.000000 37.571086 -122.326752 1427901919 26313.381291 1012053.126559 177.772265 151.485890
5 time 1970-01-17 04:38:21.979000 9 background/location 22.000000 37.779111 -122.469279 1427901979 26313.509634 438558.493894 -179.912542 -28.426652
7 time 1970-01-17 04:38:22.070000 12 background/location 91.500000 37.549345 -122.307391 1427902070 29253.881020 513225.982799 319.622432 150.799785
18 time 1970-01-17 04:38:22.610000 28 background/location 29.000000 37.779108 -122.469279 1427902610 39700.054778 684483.703065 -171.159811 -32.922451
20 time 1970-01-17 04:38:22.731000 30 background/location 73.831001 37.455289 -122.183154 1427902731 43950.158899 482968.779112 144.924322 144.924322
23 time 1970-01-17 04:38:22.820000 33 background/location 25.000000 37.779109 -122.469279 1427902820 44025.250452 1467508.348411 -34.923255 -34.923255
29 time 1970-01-17 04:38:23.028000 40 background/location 39.000000 37.442819 -122.165030 1427903028 46005.696406 793201.662169 122.714635 144.278034

In [254]:
np.nonzero(abs(with_features_train_only_df.mLongitude + 122.46) < 0.01)


Out[254]:
(array([ 0,  3,  5,  6, 18, 19, 23, 24, 25, 26, 27, 28]),)

In [255]:
np.nonzero(abs(with_features_train_only_df.mLongitude + 121.95) < 0.01)


Out[255]:
(array([], dtype=int64),)

In [256]:
cleaned_train_only_df = with_features_train_only_df[np.logical_not(abs(with_features_train_only_df.mLongitude + 122.46) < 0.01)]

In [258]:
recomputed_cleaned_train_only_df = ls.add_heading_change(ls.add_heading(ls.add_speed(
   cleaned_train_only_df.drop('speed', axis=1).drop('distance', axis=1).drop('heading', axis=1).drop('heading_change', axis=1))))

In [268]:
with_features_train_only_df.heading_change.plot(kind="bar", figsize = (20,6))


Out[268]:
<matplotlib.axes.AxesSubplot at 0x11163bd50>

In [269]:
recomputed_cleaned_train_only_df.heading_change.plot(kind="bar", figsize=(20,6))


Out[269]:
<matplotlib.axes.AxesSubplot at 0x1109c2c50>

In [281]:
ipy.inline_map(lo.get_map(cleaned_train_only_df))


Out[281]:

In [322]:
section_train_only_segmentation_points = with_features_train_only_df[with_features_train_only_df.speed > section_train_only_iqr_threshold].index

In [323]:
section_train_only_segmentation_points = section_train_only_segmentation_points.insert(0, 0)
section_train_only_segmentation_points = section_train_only_segmentation_points.insert(len(segmentation_points), with_features_train_only_df.shape[0] - 1)

In [324]:
print section_train_only_segmentation_points


Int64Index([0, 1, 4, 5, 7, 18, 20, 23, 29, 38], dtype='int64')

In [325]:
segments = zip(section_train_only_segmentation_points, section_train_only_segmentation_points[1:])

In [332]:
segments_df = pd.DataFrame(segments, columns=["start", "end"])

In [359]:
# Find longest subsequence (part with no zigzags). If any segment is good, this must be it.
# What is the definition of "longest"?
# Can't be lame and rely on number of points- note that 23 - 29 has a bunch of points and very little distance
# Yeah but using distance is also problematic - 1-4 has a huge distance because it has an undetected zig (part of zig zag) in it
# How about largest number of points that are more than 100 mts apart. That should work.

In [351]:
for (start, end) in segments:
    currSegment = with_features_train_only_df[start:end]
    currDistance = pf.calDistance(currSegment.iloc[0], currSegment.iloc[-1])
    print ("From %s to %s, number of points is %s, distance is %s" % (start, end, (end-start), pf.calDistance(currSegment.iloc[0], currSegment.iloc[-1])))


From 0 to 1, number of points is 1, distance is 0.0
From 1 to 4, number of points is 3, distance is 23266.3577442
From 4 to 5, number of points is 1, distance is 0.0
From 5 to 7, number of points is 2, distance is 0.181351790572
From 7 to 18, number of points is 11, distance is 10697.0211474
From 18 to 20, number of points is 2, distance is 0.0
From 20 to 23, number of points is 3, distance is 77.0287528643
From 23 to 29, number of points is 6, distance is 0.0478251850624
From 29 to 38, number of points is 9, distance is 8402.75466767

In [353]:
good_segments = [segments[4]]; good_segments


Out[353]:
[(7, 18)]

In [354]:
with_features_train_only_df[1:4]


Out[354]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed heading_change heading
1 time 1970-01-17 04:38:21.743000 2 background/location 36.0 37.587769 -122.362285 1427901743 23266.370435 646288.067650 0.000000 156.096104
2 time 1970-01-17 04:38:21.799000 4 background/location 49.5 37.580165 -122.344912 1427901799 1748.776506 31228.151893 -37.189927 118.906178
3 time 1970-01-17 04:38:21.893000 6 background/location 27.0 37.779109 -122.469280 1427901893 24681.006134 262563.895046 -145.192553 -26.286376

In [357]:
ls.add_speed(pd.DataFrame(with_features_train_only_df[1:4].to_dict('records')).drop('speed', axis=1).drop('distance', axis=1))


Out[357]:
filter formatted_time heading heading_change idx key mAccuracy mLatitude mLongitude mTime distance speed
0 time 1970-01-17 04:38:21.743000 156.096104 0.000000 2 background/location 36.0 37.587769 -122.362285 1427901743 0.000000 0.000000
1 time 1970-01-17 04:38:21.799000 118.906178 -37.189927 4 background/location 49.5 37.580165 -122.344912 1427901799 1748.776506 31228.151893
2 time 1970-01-17 04:38:21.893000 -26.286376 -145.192553 6 background/location 27.0 37.779109 -122.469280 1427901893 24681.006134 262563.895046

Bike section, works


In [296]:
section_bike_only = copy.copy(section_list[18])
section_bike_only.start_ts = 1427903482

In [297]:
section_bike_only_df = ls.get_section_points(section_bike_only)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1427903482}}, {'data.mTime': {'$lt': 1427904290.0}}], 'user_id': u'20150401T072115-0700_0', 'metadata.filter': u'time'} 

In [298]:
with_features_bike_only_df = ls.add_heading(ls.add_heading_change(ls.add_speed(ls.filter_accuracy(section_bike_only_df))))


filtering points Int64Index([0, 2], dtype='int64')
filtered list size went from (26, 8) to (24, 8)

In [300]:
tml = lo.evaluate_filtering([section_bike_only], [cso.BoxplotOutlier(ignore_zeros=True)],
                            [cjs.SmoothPiecewiseRansac(), cjs.SmoothBoundary()])


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1427903482}}, {'data.mTime': {'$lt': 1427904290.0}}], 'user_id': u'20150401T072115-0700_0', 'metadata.filter': u'time'} 
filtering points Int64Index([0, 2], dtype='int64')
DEBUG:root:quartile values are 0.25     135.766851
0.75    2869.587640
Name: speed, dtype: float64
DEBUG:root:iqr 2733.82078886
DEBUG:root:Found 1 potential outliers, list = [1]
DEBUG:root:Only one candidate, cluster centers are [array([1])]
DEBUG:root:Considering candidate cluster center [1]
DEBUG:root:lowRange = max(-4, 0) = 0 and highRange = max(6, 24) = 6
DEBUG:root:Area size = 7, index = Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64') with size 7
DEBUG:root:In area 0 - 6, deleted 1 points through ransac filtering
DEBUG:root:Retain mask is of size 7
DEBUG:root:with speed df shape is (24, 10), ransac_mask size = 24
DEBUG:root:filtering done, ransac deleted points = [0]
filtered list size went from (26, 8) to (24, 8)
filtering points Int64Index([0, 2], dtype='int64')
filtered list size went from (26, 8) to (24, 8)
filtering points Int64Index([0, 2], dtype='int64')
DEBUG:root:quartile values are 0.25     135.766851
0.75    2869.587640
Name: speed, dtype: float64
DEBUG:root:iqr 2733.82078886
DEBUG:root:while considering point AttrDict({'speed': 182464.72183862209, u'mLongitude': -122.077896, u'mLatitude': 37.394912400000003, u'mTime': 1427903601.0})(1), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 182464.721839
DEBUG:root:currSpeed > 11071.0500067, removing index 1 
DEBUG:root:while considering point AttrDict({'speed': 2269.4991250420549, u'mLongitude': -122.0785217, u'mLatitude': 37.394554900000003, u'mTime': 1427903631.0})(2), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 123607.174588
DEBUG:root:currSpeed > 11071.0500067, removing index 2 
DEBUG:root:while considering point AttrDict({'speed': 2642.5190727328054, u'mLongitude': -122.0793295, u'mLatitude': 37.394244399999998, u'mTime': 1427903661.0})(3), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 93823.34956
DEBUG:root:currSpeed > 11071.0500067, removing index 3 
DEBUG:root:while considering point AttrDict({'speed': 2871.0143623852427, u'mLongitude': -122.0802595, u'mLatitude': 37.394123, u'mTime': 1427903690.0})(4), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 76354.8047663
DEBUG:root:currSpeed > 11071.0500067, removing index 4 
DEBUG:root:while considering point AttrDict({'speed': 4649.7997229866551, u'mLongitude': -122.08100330000001, u'mLatitude': 37.3930164, u'mTime': 1427903720.0})(5), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 64135.8040302
DEBUG:root:currSpeed > 11071.0500067, removing index 5 
DEBUG:root:while considering point AttrDict({'speed': 4907.7811985571607, u'mLongitude': -122.0819102, u'mLatitude': 37.391958500000001, u'mTime': 1427903749.0})(6), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 55725.3898495
DEBUG:root:currSpeed > 11071.0500067, removing index 6 
DEBUG:root:while considering point AttrDict({'speed': 508.78624228609834, u'mLongitude': -122.08207059999999, u'mLatitude': 37.391886399999997, u'mTime': 1427903781.0})(7), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 48418.9986563
DEBUG:root:currSpeed > 11071.0500067, removing index 7 
DEBUG:root:while considering point AttrDict({'speed': 347.34871264881514, u'mLongitude': -122.0821885, u'mLatitude': 37.391889200000001, u'mTime': 1427903811.0})(8), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 43116.5125528
DEBUG:root:currSpeed > 11071.0500067, removing index 8 
DEBUG:root:while considering point AttrDict({'speed': 1018.0706715498312, u'mLongitude': -122.08199329999999, u'mLatitude': 37.3921159, u'mTime': 1427903841.0})(9), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 38766.3381715
DEBUG:root:currSpeed > 11071.0500067, removing index 9 
DEBUG:root:while considering point AttrDict({'speed': 146.62161075240834, u'mLongitude': -122.0819801, u'mLatitude': 37.392080499999999, u'mTime': 1427903869.0})(10), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 35475.0464716
DEBUG:root:currSpeed > 11071.0500067, removing index 10 
DEBUG:root:while considering point AttrDict({'speed': 2679.5341214744944, u'mLongitude': -122.08290700000001, u'mLatitude': 37.391780400000002, u'mTime': 1427903902.0})(11), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 32485.3592326
DEBUG:root:currSpeed > 11071.0500067, removing index 11 
DEBUG:root:while considering point AttrDict({'speed': 2868.1609178748317, u'mLongitude': -122.0832642, u'mLatitude': 37.391060500000002, u'mTime': 1427903932.0})(12), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 30111.528174
DEBUG:root:currSpeed > 11071.0500067, removing index 12 
DEBUG:root:while considering point AttrDict({'speed': 3312.4368387421955, u'mLongitude': -122.0843179, u'mLatitude': 37.391273699999999, u'mTime': 1427903961.0})(13), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 28253.582592
DEBUG:root:currSpeed > 11071.0500067, removing index 13 
DEBUG:root:while considering point AttrDict({'speed': 4574.18106268676, u'mLongitude': -122.0856308, u'mLatitude': 37.391762200000002, u'mTime': 1427903989.0})(14), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 26735.9636518
DEBUG:root:currSpeed > 11071.0500067, removing index 14 
DEBUG:root:while considering point AttrDict({'speed': 1711.6779023059614, u'mLongitude': -122.0860721, u'mLatitude': 37.391416200000002, u'mTime': 1427904021.0})(15), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 25051.2064861
DEBUG:root:currSpeed > 11071.0500067, removing index 15 
DEBUG:root:while considering point AttrDict({'speed': 2108.0634398571451, u'mLongitude': -122.0863047, u'mLatitude': 37.390898399999998, u'mTime': 1427904050.0})(16), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 23683.9094886
DEBUG:root:currSpeed > 11071.0500067, removing index 16 
DEBUG:root:while considering point AttrDict({'speed': 395.00621395142298, u'mLongitude': -122.086226, u'mLatitude': 37.390984699999997, u'mTime': 1427904080.0})(17), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 22355.5086475
DEBUG:root:currSpeed > 11071.0500067, removing index 17 
DEBUG:root:while considering point AttrDict({'speed': 50.792677233945582, u'mLongitude': -122.08622560000001, u'mLatitude': 37.390998400000001, u'mTime': 1427904110.0})(18), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 21180.5545237
DEBUG:root:currSpeed > 11071.0500067, removing index 18 
DEBUG:root:while considering point AttrDict({'speed': 8.7061269363542912, u'mLongitude': -122.0862262, u'mLatitude': 37.391000699999999, u'mTime': 1427904140.0})(19), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 20123.3209337
DEBUG:root:currSpeed > 11071.0500067, removing index 19 
DEBUG:root:while considering point AttrDict({'speed': 59.686235389153424, u'mLongitude': -122.08622579999999, u'mLatitude': 37.390984600000003, u'mTime': 1427904170.0})(20), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 19166.9003323
DEBUG:root:currSpeed > 11071.0500067, removing index 20 
DEBUG:root:while considering point AttrDict({'speed': 18.171385360576551, u'mLongitude': -122.08622560000001, u'mLatitude': 37.390979700000003, u'mTime': 1427904200.0})(21), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 18297.0766068
DEBUG:root:currSpeed > 11071.0500067, removing index 21 
DEBUG:root:while considering point AttrDict({'speed': 124.91209178840978, u'mLongitude': -122.0862259, u'mLatitude': 37.391013399999999, u'mTime': 1427904230.0})(22), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 17502.0344174
DEBUG:root:currSpeed > 11071.0500067, removing index 22 
DEBUG:root:while considering point AttrDict({'speed': 103.44523976841585, u'mLongitude': -122.08622680000001, u'mLatitude': 37.390985499999999, u'mTime': 1427904260.0})(23), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})) speed = 16774.4641535
DEBUG:root:currSpeed > 11071.0500067, removing index 23 
INFO:root:Filtering complete, removed indices = [0]
filtered list size went from (26, 8) to (24, 8)

In [302]:
ipy.inline_maps(tml, 1, 4)


Out[302]:

Section 18


In [23]:
section_18_df = ls.get_section_points(section_list[18])


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1427901675.0}}, {'data.mTime': {'$lt': 1427904290.0}}], 'user_id': u'20150401T072115-0700_0', 'metadata.filter': u'time'} 

In [24]:
with_speeds_18_df = ls.add_speed(ls.filter_accuracy(section_18_df))


filtering points Int64Index([2, 4, 7, 10, 14, 21, 22, 23, 26, 38, 48, 49, 52, 54], dtype='int64')
filtered list size went from (78, 8) to (64, 8)

In [25]:
ipy.inline_map(lo.get_map(with_speeds_18_df))


Out[25]:

In [26]:
with_speeds_18_df[abs(with_speeds_18_df.mLongitude + 122.46) < 0.01]


Out[26]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed
0 time 1970-01-17 04:38:21.707000 1 background/location 25 37.779109 -122.469279 1427901707 0.000000 0.000000
3 time 1970-01-17 04:38:21.893000 6 background/location 27 37.779109 -122.469280 1427901893 24681.006134 262563.895046
5 time 1970-01-17 04:38:21.979000 9 background/location 22 37.779111 -122.469279 1427901979 26313.509634 438558.493894
6 time 1970-01-17 04:38:22.013000 10 background/location 25 37.779109 -122.469279 1427902013 0.181352 5.333876
18 time 1970-01-17 04:38:22.610000 28 background/location 29 37.779108 -122.469279 1427902610 39700.054778 684483.703065
19 time 1970-01-17 04:38:22.640000 29 background/location 29 37.779108 -122.469279 1427902640 0.000000 0.000000
23 time 1970-01-17 04:38:22.820000 33 background/location 25 37.779109 -122.469279 1427902820 44025.250452 1467508.348411
24 time 1970-01-17 04:38:22.848000 34 background/location 23 37.779110 -122.469279 1427902848 0.145618 5.200648
25 time 1970-01-17 04:38:22.880000 35 background/location 24 37.779109 -122.469279 1427902880 0.106070 3.314700
26 time 1970-01-17 04:38:22.911000 36 background/location 24 37.779108 -122.469278 1427902911 0.173493 5.596545
27 time 1970-01-17 04:38:22.939000 37 background/location 25 37.779109 -122.469279 1427902939 0.160723 5.740104
28 time 1970-01-17 04:38:22.970000 38 background/location 25 37.779109 -122.469279 1427902970 0.047825 1.542748

In [27]:
section_18_iqr_threshold = cso.BoxplotOutlier(ignore_zeros=True).get_threshold(with_speeds_18_df)


DEBUG:root:quartile values are 0.25      508.786242
0.75    34515.555837
Name: speed, dtype: float64
DEBUG:root:iqr 34006.7695946

In [28]:
section_18_iqr_threshold_low = cso.BoxplotOutlier(ignore_zeros=True).get_lower_threshold(with_speeds_18_df)


DEBUG:root:quartile values are 0.25      508.786242
0.75    34515.555837
Name: speed, dtype: float64
DEBUG:root:iqr 34006.7695946

In [29]:
(section_18_iqr_threshold, section_18_iqr_threshold_low)


Out[29]:
(136535.86462075575, -101511.52254156614)

In [270]:
section_18_speedThresholdMap = with_speeds_18_df.speed > section_18_iqr_threshold

In [271]:
with_speeds_18_df[section_18_speedThresholdMap]


Out[271]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed
1 time 1970-01-17 04:38:21.743000 2 background/location 36.000000 37.587769 -122.362285 1427901743 23266.370435 646288.067650
3 time 1970-01-17 04:38:21.893000 6 background/location 27.000000 37.779109 -122.469280 1427901893 24681.006134 262563.895046
4 time 1970-01-17 04:38:21.919000 7 background/location 27.000000 37.571086 -122.326752 1427901919 26313.381291 1012053.126559
5 time 1970-01-17 04:38:21.979000 9 background/location 22.000000 37.779111 -122.469279 1427901979 26313.509634 438558.493894
7 time 1970-01-17 04:38:22.070000 12 background/location 91.500000 37.549345 -122.307391 1427902070 29253.881020 513225.982799
18 time 1970-01-17 04:38:22.610000 28 background/location 29.000000 37.779108 -122.469279 1427902610 39700.054778 684483.703065
20 time 1970-01-17 04:38:22.731000 30 background/location 73.831001 37.455289 -122.183154 1427902731 43950.158899 482968.779112
23 time 1970-01-17 04:38:22.820000 33 background/location 25.000000 37.779109 -122.469279 1427902820 44025.250452 1467508.348411
29 time 1970-01-17 04:38:23.028000 40 background/location 39.000000 37.442819 -122.165030 1427903028 46005.696406 793201.662169
40 time 1970-01-17 04:38:23.539000 54 background/location 25.000000 37.405087 -121.950471 1427903539 11182.049570 192793.958103
41 time 1970-01-17 04:38:23.601000 56 background/location 36.000000 37.394912 -122.077896 1427903601 11312.812754 182464.721839

In [272]:
section_18_segmentation_points = with_speeds_18_df[section_18_speedThresholdMap].index

In [273]:
section_18_segmentation_points


Out[273]:
Int64Index([1, 3, 4, 5, 7, 18, 20, 23, 29, 40, 41], dtype='int64')

In [274]:
section_18_segmentation_points = section_18_segmentation_points.insert(0,0)
section_18_segmentation_points = section_18_segmentation_points.insert(len(segmentation_points),len(speedThresholdMap) -1 )

In [275]:
section_18_segmentation_points


Out[275]:
Int64Index([0, 1, 3, 4, 5, 7, 18, 20, 23, 29, 40, 41, 63], dtype='int64')

In [276]:
zip(section_18_segmentation_points, section_18_segmentation_points[1:])


Out[276]:
[(0, 1),
 (1, 3),
 (3, 4),
 (4, 5),
 (5, 7),
 (7, 18),
 (18, 20),
 (20, 23),
 (23, 29),
 (29, 40),
 (40, 41),
 (41, 63)]

In [277]:
import emission.analysis.point_features as pf

In [278]:
segment_distance_list = []
for (start, end) in zip(segmentation_points, segmentation_points[1:]):
    currSegment = with_speeds_18_df[start:end]
    recalcSegment = ls.add_speed(currSegment.drop('speed', axis=1).drop('distance', axis=1))
    if len(recalcSegment[recalcSegment.distance > 100].index) > 0:
        print ("From %s to %s, re-break at %s" % (start, end, start + recalcSegment[recalcSegment.distance > 100].index[0]))
    print ("From %s to %s, distance is %s" % (start, end, pf.calDistance(currSegment.iloc[0], currSegment.iloc[-1])))
    segment_distance_list.append(pf.calDistance(currSegment.iloc[0], currSegment.iloc[-1]))


From 0 to 1, distance is 0.0
From 1 to 3, re-break at 2
From 1 to 3, distance is 1748.77650602
From 3 to 4, distance is 0.0
From 4 to 5, distance is 0.0
From 5 to 7, distance is 0.181351790572
From 7 to 18, re-break at 8
From 7 to 18, distance is 10697.0211474
From 18 to 20, distance is 0.0
From 20 to 23, distance is 77.0287528643
From 23 to 29, distance is 0.0478251850624
From 29 to 40, re-break at 31
From 29 to 40, distance is 9539.80014176
From 40 to 41, distance is 0.0
From 41 to 63, re-break at 45
From 41 to 63, distance is 854.106845725

In [280]:
with_speeds_18_df[0:10]


Out[280]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed
0 time 1970-01-17 04:38:21.707000 1 background/location 25.0 37.779109 -122.469279 1427901707 0.000000 0.000000
1 time 1970-01-17 04:38:21.743000 2 background/location 36.0 37.587769 -122.362285 1427901743 23266.370435 646288.067650
2 time 1970-01-17 04:38:21.799000 4 background/location 49.5 37.580165 -122.344912 1427901799 1748.776506 31228.151893
3 time 1970-01-17 04:38:21.893000 6 background/location 27.0 37.779109 -122.469280 1427901893 24681.006134 262563.895046
4 time 1970-01-17 04:38:21.919000 7 background/location 27.0 37.571086 -122.326752 1427901919 26313.381291 1012053.126559
5 time 1970-01-17 04:38:21.979000 9 background/location 22.0 37.779111 -122.469279 1427901979 26313.509634 438558.493894
6 time 1970-01-17 04:38:22.013000 10 background/location 25.0 37.779109 -122.469279 1427902013 0.181352 5.333876
7 time 1970-01-17 04:38:22.070000 12 background/location 91.5 37.549345 -122.307391 1427902070 29253.881020 513225.982799
8 time 1970-01-17 04:38:22.130000 13 background/location 41.0 37.535264 -122.295056 1427902130 1906.282413 31771.373552
9 time 1970-01-17 04:38:22.160000 14 background/location 34.0 37.529696 -122.286909 1427902160 948.421575 31614.052488

In [40]:
segment_distance_list


Out[40]:
[0.0,
 1748.776506019805,
 0.0,
 0.0,
 0.181351790571639,
 10697.021147400867,
 0.0,
 77.02875286433768,
 0.0478251850624157,
 9539.800141756501,
 0.0,
 854.1068457249145]

In [41]:
with_speeds_18_df[speedThresholdMap]


Out[41]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed
1 time 1970-01-17 04:38:21.743000 2 background/location 36.000000 37.587769 -122.362285 1427901743 23266.370435 646288.067650
3 time 1970-01-17 04:38:21.893000 6 background/location 27.000000 37.779109 -122.469280 1427901893 24681.006134 262563.895046
4 time 1970-01-17 04:38:21.919000 7 background/location 27.000000 37.571086 -122.326752 1427901919 26313.381291 1012053.126559
5 time 1970-01-17 04:38:21.979000 9 background/location 22.000000 37.779111 -122.469279 1427901979 26313.509634 438558.493894
7 time 1970-01-17 04:38:22.070000 12 background/location 91.500000 37.549345 -122.307391 1427902070 29253.881020 513225.982799
18 time 1970-01-17 04:38:22.610000 28 background/location 29.000000 37.779108 -122.469279 1427902610 39700.054778 684483.703065
20 time 1970-01-17 04:38:22.731000 30 background/location 73.831001 37.455289 -122.183154 1427902731 43950.158899 482968.779112
23 time 1970-01-17 04:38:22.820000 33 background/location 25.000000 37.779109 -122.469279 1427902820 44025.250452 1467508.348411
29 time 1970-01-17 04:38:23.028000 40 background/location 39.000000 37.442819 -122.165030 1427903028 46005.696406 793201.662169
40 time 1970-01-17 04:38:23.539000 54 background/location 25.000000 37.405087 -121.950471 1427903539 11182.049570 192793.958103
41 time 1970-01-17 04:38:23.601000 56 background/location 36.000000 37.394912 -122.077896 1427903601 11312.812754 182464.721839

In [238]:
ground_truth_mask = np.logical_or(abs(with_speeds_18_df.mLongitude + 122.46) < 0.01, abs(with_speeds_18_df.mLongitude + 121.95) < 0.01)

In [239]:
np.nonzero(ground_truth_mask)


Out[239]:
(array([ 0,  3,  5,  6, 18, 19, 23, 24, 25, 26, 27, 28, 40]),)

In [145]:
import numpy as np

In [146]:
ipy.inline_map(lo.get_map(with_speeds_18_df[np.logical_not(ground_truth_mask)]))


Out[146]:

In [148]:
np.nonzero(ground_truth_mask)


Out[148]:
(array([ 0,  5,  8,  9, 27, 28, 32, 33, 34, 35, 36, 37, 53]),)

In [240]:
after_all_filtering_section_18 = ls.add_heading_change(ls.add_heading(ls.add_speed(ls.filter_accuracy(section_18_df)[np.logical_not(ground_truth_mask)])))


filtering points Int64Index([2, 4, 7, 10, 14, 21, 22, 23, 26, 38, 48, 49, 52, 54], dtype='int64')
filtered list size went from (78, 8) to (64, 8)

In [241]:
cso.BoxplotOutlier(ignore_zeros=True).get_threshold(after_all_filtering_section_18)


DEBUG:root:quartile values are 0.25     1221.404373
0.75    30223.256567
Name: speed, dtype: float64
DEBUG:root:iqr 29001.8521937
Out[241]:
117228.81314806046

In [243]:
after_all_filtering_section_18.distance.plot(kind="bar", figsize=(20,6))


Out[243]:
<matplotlib.axes.AxesSubplot at 0x110326190>

In [46]:
speedThresholdMap.shape[0]


Out[46]:
64

In [47]:
speedThresholdMap.iloc[0] = True
speedThresholdMap.iloc[-1] = True

In [48]:
speedThresholdMap.head(), speedThresholdMap.tail()


Out[48]:
(0     True
 1     True
 2    False
 3     True
 4     True
 Name: speed, dtype: bool, 59    False
 60    False
 61    False
 62    False
 63     True
 Name: speed, dtype: bool)

In [49]:
fa = cjs.SmoothBoundary(maxSpeed = section_18_iqr_threshold)
fa.filter(with_speeds_18_df)


DEBUG:root:while considering point AttrDict({'speed': 646288.06765043165, u'mLongitude': -122.3622847, u'mLatitude': 37.587768500000003, u'mTime': 1427901743.0})(1), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779109200000001, u'mTime': 1427901707.0})) speed = 646288.06765
DEBUG:root:currSpeed > 136535.864621, removing index 1 
DEBUG:root:while considering point AttrDict({'speed': 31228.151893210805, u'mLongitude': -122.3449122, u'mLatitude': 37.580165100000002, u'mTime': 1427901799.0})(2), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779109200000001, u'mTime': 1427901707.0})) speed = 268271.918597
DEBUG:root:currSpeed > 136535.864621, removing index 2 
DEBUG:root:while considering point AttrDict({'speed': 262563.89504566987, u'mLongitude': -122.46927959999999, u'mLatitude': 37.779108899999997, u'mTime': 1427901893.0})(3), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779109200000001, u'mTime': 1427901707.0})) speed = 0.2966153195
DEBUG:root:currSpeed < 136535.864621, retaining index 3 
DEBUG:root:while considering point AttrDict({'speed': 1012053.126558671, u'mLongitude': -122.3267523, u'mLatitude': 37.571085799999999, u'mTime': 1427901919.0})(4), prev_pt (AttrDict({'speed': 262563.89504566987, u'mLongitude': -122.46927959999999, u'mLatitude': 37.779108899999997, u'mTime': 1427901893.0})) speed = 1012053.12656
DEBUG:root:currSpeed > 136535.864621, removing index 4 
DEBUG:root:while considering point AttrDict({'speed': 438558.49389392417, u'mLongitude': -122.4692787, u'mLatitude': 37.779110600000003, u'mTime': 1427901979.0})(5), prev_pt (AttrDict({'speed': 262563.89504566987, u'mLongitude': -122.46927959999999, u'mLatitude': 37.779108899999997, u'mTime': 1427901893.0})) speed = 2.38270737882
DEBUG:root:currSpeed < 136535.864621, retaining index 5 
DEBUG:root:while considering point AttrDict({'speed': 5.3338761932834995, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902013.0})(6), prev_pt (AttrDict({'speed': 438558.49389392417, u'mLongitude': -122.4692787, u'mLatitude': 37.779110600000003, u'mTime': 1427901979.0})) speed = 5.33387619328
DEBUG:root:currSpeed < 136535.864621, retaining index 6 
DEBUG:root:while considering point AttrDict({'speed': 513225.98279867985, u'mLongitude': -122.30739060000001, u'mLatitude': 37.549344599999998, u'mTime': 1427902070.0})(7), prev_pt (AttrDict({'speed': 5.3338761932834995, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902013.0})) speed = 513225.982799
DEBUG:root:currSpeed > 136535.864621, removing index 7 
DEBUG:root:while considering point AttrDict({'speed': 31771.373552087789, u'mLongitude': -122.295056, u'mLatitude': 37.535264400000003, u'mTime': 1427902130.0})(8), prev_pt (AttrDict({'speed': 5.3338761932834995, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902013.0})) speed = 266250.900829
DEBUG:root:currSpeed > 136535.864621, removing index 8 
DEBUG:root:while considering point AttrDict({'speed': 31614.052487621935, u'mLongitude': -122.2869087, u'mLatitude': 37.529696000000001, u'mTime': 1427902160.0})(9), prev_pt (AttrDict({'speed': 5.3338761932834995, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902013.0})) speed = 217995.193387
DEBUG:root:currSpeed > 136535.864621, removing index 9 
DEBUG:root:while considering point AttrDict({'speed': 34322.103754947937, u'mLongitude': -122.2704401, u'mLatitude': 37.516134399999999, u'mTime': 1427902221.0})(10), prev_pt (AttrDict({'speed': 5.3338761932834995, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902013.0})) speed = 163851.902694
DEBUG:root:currSpeed > 136535.864621, removing index 10 
DEBUG:root:while considering point AttrDict({'speed': 37117.502645151326, u'mLongitude': -122.2613273, u'mLatitude': 37.509695499999999, u'mTime': 1427902250.0})(11), prev_pt (AttrDict({'speed': 5.3338761932834995, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902013.0})) speed = 148141.293967
DEBUG:root:currSpeed > 136535.864621, removing index 11 
DEBUG:root:while considering point AttrDict({'speed': 34672.485032173689, u'mLongitude': -122.25502880000001, u'mLatitude': 37.501420699999997, u'mTime': 1427902281.0})(12), prev_pt (AttrDict({'speed': 5.3338761932834995, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902013.0})) speed = 135016.135777
DEBUG:root:currSpeed < 136535.864621, retaining index 12 
DEBUG:root:while considering point AttrDict({'speed': 32079.13158777625, u'mLongitude': -122.24740540000001, u'mLatitude': 37.495640100000003, u'mTime': 1427902310.0})(13), prev_pt (AttrDict({'speed': 34672.485032173689, u'mLongitude': -122.25502880000001, u'mLatitude': 37.501420699999997, u'mTime': 1427902281.0})) speed = 32079.1315878
DEBUG:root:currSpeed < 136535.864621, retaining index 13 
DEBUG:root:while considering point AttrDict({'speed': 36415.275104288587, u'mLongitude': -122.2383416, u'mLatitude': 37.4884743, u'mTime': 1427902341.0})(14), prev_pt (AttrDict({'speed': 32079.13158777625, u'mLongitude': -122.24740540000001, u'mLatitude': 37.495640100000003, u'mTime': 1427902310.0})) speed = 36415.2751043
DEBUG:root:currSpeed < 136535.864621, retaining index 14 
DEBUG:root:while considering point AttrDict({'speed': 16469.845897017054, u'mLongitude': -122.23347, u'mLatitude': 37.486971799999999, u'mTime': 1427902369.0})(15), prev_pt (AttrDict({'speed': 36415.275104288587, u'mLongitude': -122.2383416, u'mLatitude': 37.4884743, u'mTime': 1427902341.0})) speed = 16469.845897
DEBUG:root:currSpeed < 136535.864621, retaining index 15 
DEBUG:root:while considering point AttrDict({'speed': 1986.2223377484177, u'mLongitude': -122.230987, u'mLatitude': 37.486083200000003, u'mTime': 1427902490.0})(16), prev_pt (AttrDict({'speed': 16469.845897017054, u'mLongitude': -122.23347, u'mLatitude': 37.486971799999999, u'mTime': 1427902369.0})) speed = 1986.22233775
DEBUG:root:currSpeed < 136535.864621, retaining index 16 
DEBUG:root:while considering point AttrDict({'speed': 15426.33815939046, u'mLongitude': -122.2237678, u'mLatitude': 37.4796671, u'mTime': 1427902552.0})(17), prev_pt (AttrDict({'speed': 1986.2223377484177, u'mLongitude': -122.230987, u'mLatitude': 37.486083200000003, u'mTime': 1427902490.0})) speed = 15426.3381594
DEBUG:root:currSpeed < 136535.864621, retaining index 17 
DEBUG:root:while considering point AttrDict({'speed': 684483.7030653999, u'mLongitude': -122.4692793, u'mLatitude': 37.779108000000001, u'mTime': 1427902610.0})(18), prev_pt (AttrDict({'speed': 15426.33815939046, u'mLongitude': -122.2237678, u'mLatitude': 37.4796671, u'mTime': 1427902552.0})) speed = 684483.703065
DEBUG:root:currSpeed > 136535.864621, removing index 18 
DEBUG:root:while considering point AttrDict({'speed': 0.0, u'mLongitude': -122.4692793, u'mLatitude': 37.779108000000001, u'mTime': 1427902640.0})(19), prev_pt (AttrDict({'speed': 15426.33815939046, u'mLongitude': -122.2237678, u'mLatitude': 37.4796671, u'mTime': 1427902552.0})) speed = 451136.986111
DEBUG:root:currSpeed > 136535.864621, removing index 19 
DEBUG:root:while considering point AttrDict({'speed': 482968.77911194827, u'mLongitude': -122.18315370000001, u'mLatitude': 37.455288600000003, u'mTime': 1427902731.0})(20), prev_pt (AttrDict({'speed': 15426.33815939046, u'mLongitude': -122.2237678, u'mLatitude': 37.4796671, u'mTime': 1427902552.0})) speed = 25106.2681542
DEBUG:root:currSpeed < 136535.864621, retaining index 20 
DEBUG:root:while considering point AttrDict({'speed': 2852.9167727532472, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902758.0})(21), prev_pt (AttrDict({'speed': 482968.77911194827, u'mLongitude': -122.18315370000001, u'mLatitude': 37.455288600000003, u'mTime': 1427902731.0})) speed = 2852.91677275
DEBUG:root:currSpeed < 136535.864621, retaining index 21 
DEBUG:root:while considering point AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})(22), prev_pt (AttrDict({'speed': 2852.9167727532472, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902758.0})) speed = 0.0
DEBUG:root:currSpeed < 136535.864621, retaining index 22 
DEBUG:root:while considering point AttrDict({'speed': 1467508.348411256, u'mLongitude': -122.4692793, u'mLatitude': 37.779108600000001, u'mTime': 1427902820.0})(23), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})) speed = 1467508.34841
DEBUG:root:currSpeed > 136535.864621, removing index 23 
DEBUG:root:while considering point AttrDict({'speed': 5.2006481759310921, u'mLongitude': -122.4692795, u'mLatitude': 37.779109900000002, u'mTime': 1427902848.0})(24), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})) speed = 759058.255651
DEBUG:root:currSpeed > 136535.864621, removing index 24 
DEBUG:root:while considering point AttrDict({'speed': 3.3146995124403018, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902880.0})(25), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})) speed = 489169.741513
DEBUG:root:currSpeed > 136535.864621, removing index 25 
DEBUG:root:while considering point AttrDict({'speed': 5.5965447577157184, u'mLongitude': -122.46927770000001, u'mLatitude': 37.7791079, u'mTime': 1427902911.0})(26), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})) speed = 363843.850551
DEBUG:root:currSpeed > 136535.864621, removing index 26 
DEBUG:root:while considering point AttrDict({'speed': 5.7401040445311615, u'mLongitude': -122.4692793, u'mLatitude': 37.779108600000001, u'mTime': 1427902939.0})(27), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})) speed = 295471.479546
DEBUG:root:currSpeed > 136535.864621, removing index 27 
DEBUG:root:while considering point AttrDict({'speed': 1.5427479052392161, u'mLongitude': -122.46927909999999, u'mLatitude': 37.779108999999998, u'mTime': 1427902970.0})(28), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})) speed = 244584.870757
DEBUG:root:currSpeed > 136535.864621, removing index 28 
DEBUG:root:while considering point AttrDict({'speed': 793201.66216874053, u'mLongitude': -122.1650301, u'mLatitude': 37.442818899999999, u'mTime': 1427903028.0})(29), prev_pt (AttrDict({'speed': 0.0, u'mLongitude': -122.1825055, u'mLatitude': 37.454824799999997, u'mTime': 1427902790.0})) speed = 8571.91963977
DEBUG:root:currSpeed < 136535.864621, retaining index 29 
DEBUG:root:while considering point AttrDict({'speed': 1221.4043734013819, u'mLongitude': -122.1654738, u'mLatitude': 37.442904300000002, u'mTime': 1427903061.0})(30), prev_pt (AttrDict({'speed': 793201.66216874053, u'mLongitude': -122.1650301, u'mLatitude': 37.442818899999999, u'mTime': 1427903028.0})) speed = 1221.4043734
DEBUG:root:currSpeed < 136535.864621, retaining index 30 
DEBUG:root:while considering point AttrDict({'speed': 17587.217001654444, u'mLongitude': -122.15539269999999, u'mLatitude': 37.438107000000002, u'mTime': 1427903120.0})(31), prev_pt (AttrDict({'speed': 1221.4043734013819, u'mLongitude': -122.1654738, u'mLatitude': 37.442904300000002, u'mTime': 1427903061.0})) speed = 17587.2170017
DEBUG:root:currSpeed < 136535.864621, retaining index 31 
DEBUG:root:while considering point AttrDict({'speed': 20825.418010424219, u'mLongitude': -122.1495982, u'mLatitude': 37.434565900000003, u'mTime': 1427903151.0})(32), prev_pt (AttrDict({'speed': 17587.217001654444, u'mLongitude': -122.15539269999999, u'mLatitude': 37.438107000000002, u'mTime': 1427903120.0})) speed = 20825.4180104
DEBUG:root:currSpeed < 136535.864621, retaining index 32 
DEBUG:root:while considering point AttrDict({'speed': 33090.450784165987, u'mLongitude': -122.1403743, u'mLatitude': 37.428957199999999, u'mTime': 1427903182.0})(33), prev_pt (AttrDict({'speed': 20825.418010424219, u'mLongitude': -122.1495982, u'mLatitude': 37.434565900000003, u'mTime': 1427903151.0})) speed = 33090.4507842
DEBUG:root:currSpeed < 136535.864621, retaining index 33 
DEBUG:root:while considering point AttrDict({'speed': 33260.964194249245, u'mLongitude': -122.1319419, u'mLatitude': 37.4234431, u'mTime': 1427903211.0})(34), prev_pt (AttrDict({'speed': 33090.450784165987, u'mLongitude': -122.1403743, u'mLatitude': 37.428957199999999, u'mTime': 1427903182.0})) speed = 33260.9641942
DEBUG:root:currSpeed < 136535.864621, retaining index 34 
DEBUG:root:while considering point AttrDict({'speed': 30223.256567066153, u'mLongitude': -122.10750520000001, u'mLatitude': 37.407248600000003, u'mTime': 1427903304.0})(35), prev_pt (AttrDict({'speed': 33260.964194249245, u'mLongitude': -122.1319419, u'mLatitude': 37.4234431, u'mTime': 1427903211.0})) speed = 30223.2565671
DEBUG:root:currSpeed < 136535.864621, retaining index 35 
DEBUG:root:while considering point AttrDict({'speed': 37015.955547491591, u'mLongitude': -122.09734520000001, u'mLatitude': 37.4032926, u'mTime': 1427903331.0})(36), prev_pt (AttrDict({'speed': 30223.256567066153, u'mLongitude': -122.10750520000001, u'mLatitude': 37.407248600000003, u'mTime': 1427903304.0})) speed = 37015.9555475
DEBUG:root:currSpeed < 136535.864621, retaining index 36 
DEBUG:root:while considering point AttrDict({'speed': 34515.555836903513, u'mLongitude': -122.08707510000001, u'mLatitude': 37.399488699999999, u'mTime': 1427903360.0})(37), prev_pt (AttrDict({'speed': 37015.955547491591, u'mLongitude': -122.09734520000001, u'mLatitude': 37.4032926, u'mTime': 1427903331.0})) speed = 34515.5558369
DEBUG:root:currSpeed < 136535.864621, retaining index 37 
DEBUG:root:while considering point AttrDict({'speed': 12487.31058299685, u'mLongitude': -122.07648709999999, u'mLatitude': 37.394090499999997, u'mTime': 1427903449.0})(38), prev_pt (AttrDict({'speed': 34515.555836903513, u'mLongitude': -122.08707510000001, u'mLatitude': 37.399488699999999, u'mTime': 1427903360.0})) speed = 12487.310583
DEBUG:root:currSpeed < 136535.864621, retaining index 38 
DEBUG:root:while considering point AttrDict({'speed': 842.63439905412588, u'mLongitude': -122.07627359999999, u'mLatitude': 37.393917199999997, u'mTime': 1427903481.0})(39), prev_pt (AttrDict({'speed': 12487.31058299685, u'mLongitude': -122.07648709999999, u'mLatitude': 37.394090499999997, u'mTime': 1427903449.0})) speed = 842.634399054
DEBUG:root:currSpeed < 136535.864621, retaining index 39 
DEBUG:root:while considering point AttrDict({'speed': 192793.95810271829, u'mLongitude': -121.9504707, u'mLatitude': 37.405087100000003, u'mTime': 1427903539.0})(40), prev_pt (AttrDict({'speed': 842.63439905412588, u'mLongitude': -122.07627359999999, u'mLatitude': 37.393917199999997, u'mTime': 1427903481.0})) speed = 192793.958103
DEBUG:root:currSpeed > 136535.864621, removing index 40 
DEBUG:root:while considering point AttrDict({'speed': 182464.72183862209, u'mLongitude': -122.077896, u'mLatitude': 37.394912400000003, u'mTime': 1427903601.0})(41), prev_pt (AttrDict({'speed': 842.63439905412588, u'mLongitude': -122.07627359999999, u'mLatitude': 37.393917199999997, u'mTime': 1427903481.0})) speed = 1508.9549155
DEBUG:root:currSpeed < 136535.864621, retaining index 41 
DEBUG:root:while considering point AttrDict({'speed': 2269.4991250420549, u'mLongitude': -122.0785217, u'mLatitude': 37.394554900000003, u'mTime': 1427903631.0})(42), prev_pt (AttrDict({'speed': 182464.72183862209, u'mLongitude': -122.077896, u'mLatitude': 37.394912400000003, u'mTime': 1427903601.0})) speed = 2269.49912504
DEBUG:root:currSpeed < 136535.864621, retaining index 42 
DEBUG:root:while considering point AttrDict({'speed': 2642.5190727328054, u'mLongitude': -122.0793295, u'mLatitude': 37.394244399999998, u'mTime': 1427903661.0})(43), prev_pt (AttrDict({'speed': 2269.4991250420549, u'mLongitude': -122.0785217, u'mLatitude': 37.394554900000003, u'mTime': 1427903631.0})) speed = 2642.51907273
DEBUG:root:currSpeed < 136535.864621, retaining index 43 
DEBUG:root:while considering point AttrDict({'speed': 2871.0143623852427, u'mLongitude': -122.0802595, u'mLatitude': 37.394123, u'mTime': 1427903690.0})(44), prev_pt (AttrDict({'speed': 2642.5190727328054, u'mLongitude': -122.0793295, u'mLatitude': 37.394244399999998, u'mTime': 1427903661.0})) speed = 2871.01436239
DEBUG:root:currSpeed < 136535.864621, retaining index 44 
DEBUG:root:while considering point AttrDict({'speed': 4649.7997229866551, u'mLongitude': -122.08100330000001, u'mLatitude': 37.3930164, u'mTime': 1427903720.0})(45), prev_pt (AttrDict({'speed': 2871.0143623852427, u'mLongitude': -122.0802595, u'mLatitude': 37.394123, u'mTime': 1427903690.0})) speed = 4649.79972299
DEBUG:root:currSpeed < 136535.864621, retaining index 45 
DEBUG:root:while considering point AttrDict({'speed': 4907.7811985571607, u'mLongitude': -122.0819102, u'mLatitude': 37.391958500000001, u'mTime': 1427903749.0})(46), prev_pt (AttrDict({'speed': 4649.7997229866551, u'mLongitude': -122.08100330000001, u'mLatitude': 37.3930164, u'mTime': 1427903720.0})) speed = 4907.78119856
DEBUG:root:currSpeed < 136535.864621, retaining index 46 
DEBUG:root:while considering point AttrDict({'speed': 508.78624228609834, u'mLongitude': -122.08207059999999, u'mLatitude': 37.391886399999997, u'mTime': 1427903781.0})(47), prev_pt (AttrDict({'speed': 4907.7811985571607, u'mLongitude': -122.0819102, u'mLatitude': 37.391958500000001, u'mTime': 1427903749.0})) speed = 508.786242286
DEBUG:root:currSpeed < 136535.864621, retaining index 47 
DEBUG:root:while considering point AttrDict({'speed': 347.34871264881514, u'mLongitude': -122.0821885, u'mLatitude': 37.391889200000001, u'mTime': 1427903811.0})(48), prev_pt (AttrDict({'speed': 508.78624228609834, u'mLongitude': -122.08207059999999, u'mLatitude': 37.391886399999997, u'mTime': 1427903781.0})) speed = 347.348712649
DEBUG:root:currSpeed < 136535.864621, retaining index 48 
DEBUG:root:while considering point AttrDict({'speed': 1018.0706715498312, u'mLongitude': -122.08199329999999, u'mLatitude': 37.3921159, u'mTime': 1427903841.0})(49), prev_pt (AttrDict({'speed': 347.34871264881514, u'mLongitude': -122.0821885, u'mLatitude': 37.391889200000001, u'mTime': 1427903811.0})) speed = 1018.07067155
DEBUG:root:currSpeed < 136535.864621, retaining index 49 
DEBUG:root:while considering point AttrDict({'speed': 146.62161075240834, u'mLongitude': -122.0819801, u'mLatitude': 37.392080499999999, u'mTime': 1427903869.0})(50), prev_pt (AttrDict({'speed': 1018.0706715498312, u'mLongitude': -122.08199329999999, u'mLatitude': 37.3921159, u'mTime': 1427903841.0})) speed = 146.621610752
DEBUG:root:currSpeed < 136535.864621, retaining index 50 
DEBUG:root:while considering point AttrDict({'speed': 2679.5341214744944, u'mLongitude': -122.08290700000001, u'mLatitude': 37.391780400000002, u'mTime': 1427903902.0})(51), prev_pt (AttrDict({'speed': 146.62161075240834, u'mLongitude': -122.0819801, u'mLatitude': 37.392080499999999, u'mTime': 1427903869.0})) speed = 2679.53412147
DEBUG:root:currSpeed < 136535.864621, retaining index 51 
DEBUG:root:while considering point AttrDict({'speed': 2868.1609178748317, u'mLongitude': -122.0832642, u'mLatitude': 37.391060500000002, u'mTime': 1427903932.0})(52), prev_pt (AttrDict({'speed': 2679.5341214744944, u'mLongitude': -122.08290700000001, u'mLatitude': 37.391780400000002, u'mTime': 1427903902.0})) speed = 2868.16091787
DEBUG:root:currSpeed < 136535.864621, retaining index 52 
DEBUG:root:while considering point AttrDict({'speed': 3312.4368387421955, u'mLongitude': -122.0843179, u'mLatitude': 37.391273699999999, u'mTime': 1427903961.0})(53), prev_pt (AttrDict({'speed': 2868.1609178748317, u'mLongitude': -122.0832642, u'mLatitude': 37.391060500000002, u'mTime': 1427903932.0})) speed = 3312.43683874
DEBUG:root:currSpeed < 136535.864621, retaining index 53 
DEBUG:root:while considering point AttrDict({'speed': 4574.18106268676, u'mLongitude': -122.0856308, u'mLatitude': 37.391762200000002, u'mTime': 1427903989.0})(54), prev_pt (AttrDict({'speed': 3312.4368387421955, u'mLongitude': -122.0843179, u'mLatitude': 37.391273699999999, u'mTime': 1427903961.0})) speed = 4574.18106269
DEBUG:root:currSpeed < 136535.864621, retaining index 54 
DEBUG:root:while considering point AttrDict({'speed': 1711.6779023059614, u'mLongitude': -122.0860721, u'mLatitude': 37.391416200000002, u'mTime': 1427904021.0})(55), prev_pt (AttrDict({'speed': 4574.18106268676, u'mLongitude': -122.0856308, u'mLatitude': 37.391762200000002, u'mTime': 1427903989.0})) speed = 1711.67790231
DEBUG:root:currSpeed < 136535.864621, retaining index 55 
DEBUG:root:while considering point AttrDict({'speed': 2108.0634398571451, u'mLongitude': -122.0863047, u'mLatitude': 37.390898399999998, u'mTime': 1427904050.0})(56), prev_pt (AttrDict({'speed': 1711.6779023059614, u'mLongitude': -122.0860721, u'mLatitude': 37.391416200000002, u'mTime': 1427904021.0})) speed = 2108.06343986
DEBUG:root:currSpeed < 136535.864621, retaining index 56 
DEBUG:root:while considering point AttrDict({'speed': 395.00621395142298, u'mLongitude': -122.086226, u'mLatitude': 37.390984699999997, u'mTime': 1427904080.0})(57), prev_pt (AttrDict({'speed': 2108.0634398571451, u'mLongitude': -122.0863047, u'mLatitude': 37.390898399999998, u'mTime': 1427904050.0})) speed = 395.006213951
DEBUG:root:currSpeed < 136535.864621, retaining index 57 
DEBUG:root:while considering point AttrDict({'speed': 50.792677233945582, u'mLongitude': -122.08622560000001, u'mLatitude': 37.390998400000001, u'mTime': 1427904110.0})(58), prev_pt (AttrDict({'speed': 395.00621395142298, u'mLongitude': -122.086226, u'mLatitude': 37.390984699999997, u'mTime': 1427904080.0})) speed = 50.7926772339
DEBUG:root:currSpeed < 136535.864621, retaining index 58 
DEBUG:root:while considering point AttrDict({'speed': 8.7061269363542912, u'mLongitude': -122.0862262, u'mLatitude': 37.391000699999999, u'mTime': 1427904140.0})(59), prev_pt (AttrDict({'speed': 50.792677233945582, u'mLongitude': -122.08622560000001, u'mLatitude': 37.390998400000001, u'mTime': 1427904110.0})) speed = 8.70612693635
DEBUG:root:currSpeed < 136535.864621, retaining index 59 
DEBUG:root:while considering point AttrDict({'speed': 59.686235389153424, u'mLongitude': -122.08622579999999, u'mLatitude': 37.390984600000003, u'mTime': 1427904170.0})(60), prev_pt (AttrDict({'speed': 8.7061269363542912, u'mLongitude': -122.0862262, u'mLatitude': 37.391000699999999, u'mTime': 1427904140.0})) speed = 59.6862353892
DEBUG:root:currSpeed < 136535.864621, retaining index 60 
DEBUG:root:while considering point AttrDict({'speed': 18.171385360576551, u'mLongitude': -122.08622560000001, u'mLatitude': 37.390979700000003, u'mTime': 1427904200.0})(61), prev_pt (AttrDict({'speed': 59.686235389153424, u'mLongitude': -122.08622579999999, u'mLatitude': 37.390984600000003, u'mTime': 1427904170.0})) speed = 18.1713853606
DEBUG:root:currSpeed < 136535.864621, retaining index 61 
DEBUG:root:while considering point AttrDict({'speed': 124.91209178840978, u'mLongitude': -122.0862259, u'mLatitude': 37.391013399999999, u'mTime': 1427904230.0})(62), prev_pt (AttrDict({'speed': 18.171385360576551, u'mLongitude': -122.08622560000001, u'mLatitude': 37.390979700000003, u'mTime': 1427904200.0})) speed = 124.912091788
DEBUG:root:currSpeed < 136535.864621, retaining index 62 
DEBUG:root:while considering point AttrDict({'speed': 103.44523976841585, u'mLongitude': -122.08622680000001, u'mLatitude': 37.390985499999999, u'mTime': 1427904260.0})(63), prev_pt (AttrDict({'speed': 124.91209178840978, u'mLongitude': -122.0862259, u'mLatitude': 37.391013399999999, u'mTime': 1427904230.0})) speed = 103.445239768
DEBUG:root:currSpeed < 136535.864621, retaining index 63 
INFO:root:Filtering complete, removed indices = [ 0  3  5  6 12 13 14 15 16 17 20 21 22 29 30 31 32 33 34 35 36 37 38 39 41
 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63]

In [50]:
ipy.inline_map(lo.get_map(with_speeds_18_df[fa.inlier_mask_]))


Out[50]:

In [51]:
fa = cjs.SmoothPiecewiseRansac(maxSpeed = section_18_iqr_threshold)
fa.filter(with_speeds_18_df)


DEBUG:root:Found 11 potential outliers, list = [ 1  3  4  5  7 18 20 23 29 40 41]
DEBUG:root:Found 3 clusters with centers [[ 4]
 [23]
 [41]]
DEBUG:root:Considering candidate cluster center [4]
DEBUG:root:lowRange = max(-1, 0) = 0 and highRange = max(9, 64) = 9
DEBUG:root:Considering candidate cluster center [23]
DEBUG:root:lowRange = max(18, 0) = 18 and highRange = max(28, 64) = 28
DEBUG:root:Considering candidate cluster center [41]
DEBUG:root:lowRange = max(36, 0) = 36 and highRange = max(46, 64) = 46
DEBUG:root:Area size = 10, index = Int64Index([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype='int64') with size 10
DEBUG:root:In area 0 - 9, deleted 0 points through ransac filtering
DEBUG:root:Retain mask is of size 10
DEBUG:root:Area size = 11, index = Int64Index([18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28], dtype='int64') with size 11
DEBUG:root:In area 18 - 28, deleted 5 points through ransac filtering
DEBUG:root:Retain mask is of size 11
DEBUG:root:Area size = 11, index = Int64Index([36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46], dtype='int64') with size 11
DEBUG:root:In area 36 - 46, deleted 4 points through ransac filtering
DEBUG:root:Retain mask is of size 11
DEBUG:root:with speed df shape is (64, 10), ransac_mask size = 64
DEBUG:root:filtering done, ransac deleted points = [0]

In [52]:
np.nonzero(np.logical_not(fa.inlier_mask_))


Out[52]:
(array([20, 21, 22, 24, 26, 40, 44, 45, 46]),)

In [53]:
ipy.inline_map(lo.get_map(with_speeds_18_df[1:11]))


Out[53]:

In [54]:
with_hcs_18_df = ls.add_heading_change(ls.add_heading(with_speeds_18_df))

In [152]:
np.count_nonzero(after_all_filtering_section_18.speed > 117228.81314806046)


Out[152]:
0

In [55]:
with_hcs_18_df.heading_change.plot(kind="bar", figsize=(20,6))


Out[55]:
<matplotlib.axes.AxesSubplot at 0x10b74c6d0>

In [242]:
after_all_filtering_section_18.heading_change.plot(kind="bar", figsize=(20,6))


Out[242]:
<matplotlib.axes.AxesSubplot at 0x10fa6c610>

In [56]:
with_hcs_18_df.heading.plot(kind="bar", figsize=(20,6))


Out[56]:
<matplotlib.axes.AxesSubplot at 0x10b68df90>

In [57]:
with_hcs_18_df[np.logical_not(ground_truth_mask)].heading.plot(kind="bar", figsize=(15, 6))


Out[57]:
<matplotlib.axes.AxesSubplot at 0x10c6cc950>

In [58]:
with_hcs_18_df[np.logical_not(ground_truth_mask)].heading_change.plot(kind="bar", figsize=(15, 6))


Out[58]:
<matplotlib.axes.AxesSubplot at 0x10d393710>

In [59]:
with_hcs_18_df[abs(with_hcs_18_df.heading_change) > 135]


Out[59]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed heading heading_change
3 time 1970-01-17 04:38:21.893000 6 background/location 27.000000 37.779109 -122.469280 1427901893 24681.006134 262563.895046 -26.286376 -145.192553
4 time 1970-01-17 04:38:21.919000 7 background/location 27.000000 37.571086 -122.326752 1427901919 26313.381291 1012053.126559 151.485890 177.772265
5 time 1970-01-17 04:38:21.979000 9 background/location 22.000000 37.779111 -122.469279 1427901979 26313.509634 438558.493894 -28.426652 -179.912542
6 time 1970-01-17 04:38:22.013000 10 background/location 25.000000 37.779109 -122.469279 1427902013 0.181352 5.333876 -168.822647 -140.395995
7 time 1970-01-17 04:38:22.070000 12 background/location 91.500000 37.549345 -122.307391 1427902070 29253.881020 513225.982799 150.799785 319.622432
18 time 1970-01-17 04:38:22.610000 28 background/location 29.000000 37.779108 -122.469279 1427902610 39700.054778 684483.703065 -32.922451 -171.159811
20 time 1970-01-17 04:38:22.731000 30 background/location 73.831001 37.455289 -122.183154 1427902731 43950.158899 482968.779112 144.924322 144.924322
25 time 1970-01-17 04:38:22.880000 35 background/location 24.000000 37.779109 -122.469279 1427902880 0.106070 3.314700 160.644680 167.577620
27 time 1970-01-17 04:38:22.939000 37 background/location 25.000000 37.779109 -122.469279 1427902939 0.160723 5.740104 -61.034055 -195.864499
30 time 1970-01-17 04:38:23.061000 41 background/location 78.000000 37.442904 -122.165474 1427903061 40.306344 1221.404373 -76.373059 -220.651093
31 time 1970-01-17 04:38:23.120000 42 background/location 57.000000 37.438107 -122.155393 1427903120 1037.645803 17587.217002 120.933075 197.306134
41 time 1970-01-17 04:38:23.601000 56 background/location 36.000000 37.394912 -122.077896 1427903601 11312.812754 182464.721839 -95.700947 -179.285509
51 time 1970-01-17 04:38:23.902000 66 background/location 102.927002 37.391780 -122.082907 1427903902 88.424626 2679.534121 -112.171145 -275.669027
57 time 1970-01-17 04:38:24.080000 72 background/location 37.500000 37.390985 -122.086226 1427904080 11.850186 395.006214 35.924889 196.283354
60 time 1970-01-17 04:38:24.170000 75 background/location 37.500000 37.390985 -122.086226 1427904170 1.790587 59.686235 178.869164 190.578692
62 time 1970-01-17 04:38:24.230000 77 background/location 40.500000 37.391013 -122.086226 1427904230 3.747363 124.912092 -0.405234 -178.547841
63 time 1970-01-17 04:38:24.260000 78 background/location 37.500000 37.390985 -122.086227 1427904260 3.103357 103.445240 -178.531867 -178.126633

Section from new data


In [60]:
from uuid import UUID
import attrdict as ad

In [61]:
section_new_data = ad.AttrDict({'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'loc_filter': 'distance',
                             'start_ts': 1437667649000.0, 'end_ts': 1437671636000.0, 'source': 'new'})

In [62]:
section_new_data_df = ls.get_section_points(section_new_data)


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1437667649000.0}}, {'data.mTime': {'$lt': 1437671636000.0}}], 'user_id': UUID('b0d937d0-70ef-305e-9563-440369012b39'), 'metadata.filter': 'distance'} 

In [63]:
import emission.core.get_database as edb

In [64]:
edb.get_usercache_db().find({'data.mTime': 1437667649213}).count()


Out[64]:
1

In [65]:
with_speeds_new_data_df = ls.add_speed(ls.filter_accuracy(section_new_data_df))


filtering points Int64Index([2, 3, 8, 14, 16, 17, 24, 25, 36], dtype='int64')
filtered list size went from (39, 28) to (30, 28)

In [66]:
new_data_iqr_threshold = cso.BoxplotOutlier(ignore_zeros=True).get_threshold(with_speeds_new_data_df)


DEBUG:root:quartile values are 0.25     7.640136
0.75    20.105116
Name: speed, dtype: float64
DEBUG:root:iqr 12.4649802436

In [67]:
new_data_iqr_threshold


Out[67]:
57.500057136004003

In [367]:
with_speeds_new_data_df[with_speeds_new_data_df.speed > new_data_iqr_threshold][["mLatitude", "mLongitude", "mAccuracy", "speed", "mTime", "formatted_time"]]


Out[367]:
mLatitude mLongitude mAccuracy speed mTime formatted_time
8 37.600321 -122.386934 33.0 147.878263 1437669334140 2015-07-23 09:35:34
9 37.727916 -122.443265 40.5 500.713177 1437669364156 2015-07-23 09:36:04
10 37.600319 -122.386934 42.0 235.091036 1437669428087 2015-07-23 09:37:08

In [368]:
section_new_data_segmentation_points = with_speeds_new_data_df[with_speeds_new_data_df.speed > new_data_iqr_threshold].index

In [369]:
section_new_data_segmentation_points = section_new_data_segmentation_points.insert(0,0)
section_new_data_segmentation_points = section_new_data_segmentation_points.insert(len(section_new_data_segmentation_points),
                                                                                   with_speeds_new_data_df.shape[0])

In [370]:
zip(section_new_data_segmentation_points, section_new_data_segmentation_points[1:])


Out[370]:
[(0, 8), (8, 9), (9, 10), (10, 30)]

In [371]:
for (start, end) in zip(section_new_data_segmentation_points, section_new_data_segmentation_points[1:]):
    currSegment = with_speeds_new_data_df[start:end]
    currDistance = pf.calDistance(currSegment.iloc[0], currSegment.iloc[-1])
    print ("From %s to %s, number of points is %s, distance is %s" % (start, end, (end-start), pf.calDistance(currSegment.iloc[0], currSegment.iloc[-1])))


From 0 to 8, number of points is 8, distance is 14025.4359673
From 8 to 9, number of points is 1, distance is 0.0
From 9 to 10, number of points is 1, distance is 0.0
From 10 to 30, number of points is 20, distance is 31915.3776788

In [379]:
pd.Series([14025.4359673, 0,0, 31915.3776788]).hist()


Out[379]:
<matplotlib.axes.AxesSubplot at 0x1111be1d0>

In [378]:
with_speeds_new_data_df[0:10][["mLatitude", "mLongitude", "mAccuracy", "speed", "distance", "mTime", "formatted_time"]]


Out[378]:
mLatitude mLongitude mAccuracy speed distance mTime formatted_time
0 37.598937 -122.386544 37.500 0.000000 0.000000 1437667649213 2015-07-23 09:07:29
1 37.613312 -122.400499 58.500 2.824419 2016.389432 1437668363126 2015-07-23 09:19:23
2 37.694269 -122.470619 57.000 18.469786 10915.292588 1437668954107 2015-07-23 09:29:14
3 37.699254 -122.470745 66.881 17.768932 554.372911 1437668985306 2015-07-23 09:29:45
4 37.705952 -122.468711 37.500 12.003937 765.983220 1437669049117 2015-07-23 09:30:49
5 37.710586 -122.466058 40.500 6.079706 565.692329 1437669142163 2015-07-23 09:32:22
6 37.711364 -122.453417 52.500 17.466772 1115.375653 1437669206020 2015-07-23 09:33:26
7 37.714813 -122.449480 51.000 15.055191 516.739325 1437669240343 2015-07-23 09:34:00
8 37.600321 -122.386934 33.000 147.878263 13870.537393 1437669334140 2015-07-23 09:35:34
9 37.727916 -122.443265 40.500 500.713177 15029.406709 1437669364156 2015-07-23 09:36:04

In [382]:
with_speeds_new_data_df[10:15][["mLatitude", "mLongitude", "mAccuracy", "speed", "distance", "mTime", "formatted_time"]]


Out[382]:
mLatitude mLongitude mAccuracy speed distance mTime formatted_time
10 37.600319 -122.386934 42.000 235.091036 15029.605054 1437669428087 2015-07-23 09:37:08
11 37.600321 -122.386934 33.000 0.000399 0.225165 1437669992128 2015-07-23 09:46:32
12 37.807162 -122.301494 55.500 38.963991 24196.794040 1437670613132 2015-07-23 09:56:53
13 37.805007 -122.295444 162.821 18.749856 583.045535 1437670644228 2015-07-23 09:57:24
14 37.804614 -122.294252 40.500 3.653723 113.437144 1437670675275 2015-07-23 09:57:55

In [69]:
ground_truth_mask = abs(with_speeds_new_data_df.mLongitude + 122.38) < 0.01

In [70]:
ground_truth_mask.iloc[0] = False

In [71]:
with_speeds_new_data_df[ground_truth_mask].index


Out[71]:
Int64Index([8, 10, 11], dtype='int64')

In [72]:
ipy.inline_map(lo.get_map(with_speeds_new_data_df))


Out[72]:

In [73]:
ipy.inline_map(lo.get_map(with_speeds_new_data_df[np.logical_not(ground_truth_mask)]))


Out[73]:

In [74]:
with_speeds_new_data_df[ground_truth_mask].index


Out[74]:
Int64Index([8, 10, 11], dtype='int64')

In [75]:
with_speeds_new_data_df[5:15][["mLatitude", "mLongitude", "mAccuracy", "speed", "mTime", "formatted_time"]]


Out[75]:
mLatitude mLongitude mAccuracy speed mTime formatted_time
5 37.710586 -122.466058 40.500 6.079706 1437669142163 2015-07-23 09:32:22
6 37.711364 -122.453417 52.500 17.466772 1437669206020 2015-07-23 09:33:26
7 37.714813 -122.449480 51.000 15.055191 1437669240343 2015-07-23 09:34:00
8 37.600321 -122.386934 33.000 147.878263 1437669334140 2015-07-23 09:35:34
9 37.727916 -122.443265 40.500 500.713177 1437669364156 2015-07-23 09:36:04
10 37.600319 -122.386934 42.000 235.091036 1437669428087 2015-07-23 09:37:08
11 37.600321 -122.386934 33.000 0.000399 1437669992128 2015-07-23 09:46:32
12 37.807162 -122.301494 55.500 38.963991 1437670613132 2015-07-23 09:56:53
13 37.805007 -122.295444 162.821 18.749856 1437670644228 2015-07-23 09:57:24
14 37.804614 -122.294252 40.500 3.653723 1437670675275 2015-07-23 09:57:55

In [153]:
orig_ground_truth_mask_new_data = ground_truth_mask = abs(section_new_data_df.mLongitude + 122.38) < 0.01

In [156]:
after_all_filtering_section_new_data = ls.add_speed(ls.filter_accuracy(section_new_data_df[np.logical_not(orig_ground_truth_mask_new_data)]))


filtering points Int64Index([3, 8, 14, 16, 17, 24, 25, 36], dtype='int64')
filtered list size went from (34, 28) to (26, 28)

In [157]:
cso.BoxplotOutlier(ignore_zeros=True).get_threshold(after_all_filtering_section_new_data)


DEBUG:root:quartile values are 0.25     7.862146
0.75    18.181250
Name: speed, dtype: float64
DEBUG:root:iqr 10.3191044956
Out[157]:
49.138563586884288

In [158]:
np.count_nonzero(after_all_filtering_section_new_data.speed > 49.138563586884288)


Out[158]:
0

With modified RANSAC code.

Use 3D RANSAC algorithm as suggested by raff with the third dimension as time


In [94]:
locArr = with_hcs_18_df[["mLongitude", "mLatitude"]].iloc[0:11].as_matrix()

In [97]:
timeArr = with_hcs_18_df.mTime.iloc[0:11].as_matrix()

In [208]:
with_hcs_18_df.index[0:11] * 1000


Out[208]:
Int64Index([0, 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, 9000, 10000], dtype='int64')

In [218]:
from sklearn import linear_model
model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
model_ransac.fit(locArr, timeArr)
model_ransac.inlier_mask_


Out[218]:
array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True], dtype=bool)

In [212]:
from sklearn import linear_model
model_linear = linear_model.LinearRegression()
model_ransac = linear_model.RANSACRegressor(model_linear)
model_ransac.fit(locArr, with_hcs_18_df.index[0:11])
model_ransac.inlier_mask_


Out[212]:
array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True], dtype=bool)

In [216]:
model_linear = linear_model.LinearRegression().fit(locArr, timeArr)
model_linear.coef_


Out[216]:
array([ 11603.9870189 ,   7499.59480915])

In [219]:
model_ransac.predict(locArr)


Out[219]:
array([[  1.42790196e+09],
       [  1.42790170e+09],
       [  1.42790185e+09],
       [  1.42790196e+09],
       [  1.42790199e+09],
       [  1.42790196e+09],
       [  1.42790196e+09],
       [  1.42790205e+09],
       [  1.42790209e+09],
       [  1.42790214e+09],
       [  1.42790223e+09]])

In [220]:
import datetime as pydt

In [223]:
[pydt.datetime.fromtimestamp(time) for time in model_ransac.predict(locArr)]


Out[223]:
[datetime.datetime(2015, 4, 1, 8, 25, 58, 469351),
 datetime.datetime(2015, 4, 1, 8, 21, 39, 826353),
 datetime.datetime(2015, 4, 1, 8, 24, 8, 299256),
 datetime.datetime(2015, 4, 1, 8, 25, 58, 460866),
 datetime.datetime(2015, 4, 1, 8, 26, 34, 318596),
 datetime.datetime(2015, 4, 1, 8, 25, 58, 485555),
 datetime.datetime(2015, 4, 1, 8, 25, 58, 467727),
 datetime.datetime(2015, 4, 1, 8, 27, 32, 97266),
 datetime.datetime(2015, 4, 1, 8, 28, 7, 41197),
 datetime.datetime(2015, 4, 1, 8, 29, 0, 415836),
 datetime.datetime(2015, 4, 1, 8, 30, 29, 587349)]

In [226]:
from sklearn import linear_model
model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
model_ransac.fit(locArr, with_hcs_18_df.index[0:11] * 1000)
model_ransac.inlier_mask_


Out[226]:
array([False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True], dtype=bool)

In [228]:
[index for index in model_ransac.predict(locArr)]


Out[228]:
[array([ 4600.44836058]),
 array([ 107.49892901]),
 array([ 2881.5411263]),
 array([ 4600.29105106]),
 array([ 5612.3989829]),
 array([ 4600.74741301]),
 array([ 4600.41849769]),
 array([ 6723.0932631]),
 array([ 7396.37495576]),
 array([ 8398.32204263]),
 array([ 10079.31373855])]

In [230]:
timeLatArr = with_hcs_18_df[["mTime", "mLatitude"]].iloc[0:11].as_matrix()
lonArr = with_hcs_18_df.mLongitude.iloc[0:11].as_matrix()

In [231]:
model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
model_ransac.fit(timeLatArr, lonArr)
model_ransac.inlier_mask_


Out[231]:
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True], dtype=bool)

In [225]:
from sklearn import linear_model
model_ransac = linear_model.RANSACRegressor(linear_model.LinearRegression())
model_ransac.fit(locAndTimeArr, with_hcs_18_df.index[0:11])
model_ransac.inlier_mask_


Out[225]:
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True], dtype=bool)

In [111]:
from sklearn import linear_model
model_ransac = linear_model.RANSACRegressor(linear_model.Lasso())
model_ransac.fit(locArr, timeArr)
model_ransac.inlier_mask_


Out[111]:
array([ True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True], dtype=bool)

In [113]:
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import Pipeline

model_pipeline = Pipeline([('poly', PolynomialFeatures(degree=3)),
                  ('linear', linear_model.LinearRegression(fit_intercept=False))])
model_ransac = linear_model.RANSACRegressor(model_pipeline)
model_ransac.fit(locArr, timeArr)
model_ransac.inlier_mask_


Out[113]:
array([False,  True,  True,  True,  True,  True,  True, False, False,
       False, False], dtype=bool)

In [115]:
np.nonzero(np.logical_not(model_ransac.inlier_mask_))


Out[115]:
(array([ 0,  7,  8,  9, 10]),)

In [108]:
locAndTimeArr = with_hcs_18_df[["mLongitude", "mLatitude", "mTime"]].iloc[0:11].as_matrix()

In [102]:
from sklearn import svm
model_one_class_svm = svm.OneClassSVM()
model_one_class_svm.fit(locAndTimeArr)


Out[102]:
OneClassSVM(cache_size=200, coef0=0.0, degree=3, gamma=0.0, kernel='rbf',
      max_iter=-1, nu=0.5, random_state=None, shrinking=True, tol=0.001,
      verbose=False)

In [103]:
model_one_class_svm.decision_function(locAndTimeArr)


Out[103]:
array([[ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.],
       [ 0.]])

In [99]:
import numpy as np

In [100]:
np.nonzero(np.logical_not(model_ransac.inlier_mask_))


Out[100]:
(array([0]),)

Section 12: The Palo Alto starter?


In [117]:
section_12_df = ls.get_section_points(section_list[12])


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1428591536.0}}, {'data.mTime': {'$lt': 1428593494.0}}], 'user_id': u'20150409T065856-0700_0', 'metadata.filter': u'time'} 

In [118]:
with_speeds_section_12_df = ls.add_speed(ls.filter_accuracy(section_12_df))


filtering points Int64Index([1, 2, 3, 6, 7, 8, 9, 10, 11, 15, 22, 25, 30, 31, 32, 33, 34, 36, 39, 41, 46, 47], dtype='int64')
filtered list size went from (57, 8) to (35, 8)

In [120]:
section_12_iqr_threshold = cso.BoxplotOutlier(ignore_zeros=True).get_threshold(with_speeds_section_12_df); section_12_iqr_threshold


DEBUG:root:quartile values are 0.25     2519.376592
0.75    29717.948299
Name: speed, dtype: float64
DEBUG:root:iqr 27198.5717071
Out[120]:
111313.66341979345

In [121]:
ipy.inline_map(lo.get_map(with_speeds_section_12_df))


Out[121]:

In [123]:
section_list[12].prev_section


Out[123]:
u'20150409T064324-0700_0'

In [126]:
with_speeds_section_12_df[0:10]


Out[126]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed
0 time 1970-01-17 04:49:51.568000 1 background/location 40.500000 37.443630 -122.165035 1428591568 0.000000 0.000000
1 time 1970-01-17 04:49:51.694000 5 background/location 27.000000 37.403830 -122.100484 1428591694 7216.553488 57274.234028
2 time 1970-01-17 04:49:51.717000 6 background/location 27.000000 37.403830 -122.100484 1428591717 0.000000 0.000000
3 time 1970-01-17 04:49:51.957000 13 background/location 20.000000 37.443489 -122.163767 1428591957 7118.754513 29661.477137
4 time 1970-01-17 04:49:51.963000 14 background/location 20.000000 37.443489 -122.163767 1428591963 0.000000 0.000000
5 time 1970-01-17 04:49:51.992000 15 background/location 73.737999 37.443489 -122.163767 1428591992 0.000000 0.000000
6 time 1970-01-17 04:49:52.040000 17 background/location 10.000000 37.443938 -122.165095 1428592040 127.391901 2653.997930
7 time 1970-01-17 04:49:52.048000 18 background/location 10.000000 37.443938 -122.165095 1428592048 0.000000 0.000000
8 time 1970-01-17 04:49:52.080000 19 background/location 68.084000 37.443384 -122.164812 1428592080 66.487090 2077.721565
9 time 1970-01-17 04:49:52.110000 20 background/location 10.000000 37.443938 -122.165431 1428592110 82.346776 2744.892534

In [124]:
ipy.inline_map(lo.get_map(ls.get_section_points(lq.get_section(section_list[12].prev_section))))


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1428590604.0}}, {'data.mTime': {'$lt': 1428591507.0}}], 'user_id': u'20150409T064324-0700_0', 'metadata.filter': u'time'} 
Out[124]:

In [159]:
np.count_nonzero(with_speeds_section_12_df.speed > section_12_iqr_threshold)


Out[159]:
0

In [162]:
np.count_nonzero(with_speeds_section_12_df.speed > 
                 cso.BoxplotOutlier(multiplier=cso.BoxplotOutlier.MINOR, ignore_zeros=True).get_threshold(with_speeds_section_12_df))


DEBUG:root:quartile values are 0.25     2519.376592
0.75    29717.948299
Name: speed, dtype: float64
DEBUG:root:iqr 27198.5717071
Out[162]:
0

In [163]:
cso.BoxplotOutlier(multiplier=cso.BoxplotOutlier.MINOR, ignore_zeros=True).get_threshold(with_speeds_section_12_df)


DEBUG:root:quartile values are 0.25     2519.376592
0.75    29717.948299
Name: speed, dtype: float64
DEBUG:root:iqr 27198.5717071
Out[163]:
70515.805859206695

In [165]:
with_speeds_section_12_df.speed.hist(bins=50, figsize=(20,6))


Out[165]:
<matplotlib.axes.AxesSubplot at 0x10dfbbad0>

In [167]:
np.count_nonzero(with_speeds_section_12_df.speed == 0)


Out[167]:
7

In [172]:
np.count_nonzero(with_speeds_section_12_df.speed < 1000)


Out[172]:
10

In [182]:
quartile_vals = with_speeds_section_12_df[with_speeds_section_12_df.speed >= 0].quantile([0.25, 0.75]).speed

In [183]:
iqr = quartile_vals.iloc[1] - quartile_vals.iloc[0]
quartile_vals.iloc[1] + 1.5 * iqr, quartile_vals.iloc[1] + 3 * iqr


Out[183]:
(66763.767673164402, 106467.18797478775)

Final iterative algorithm

Three main steps:

  • Find segments. Find points where the incoming speed is the threshold (jumps) and use them to make segments.
    • Note that this may miss/conflate some segments because they are below the threshold. These may be at any point of any segment (not necessarily in first or last). We handle these in the next stage.
  • Identify a "known good" segment.
    • We can't just use the longest segment or the segment with the most points because they may have jumps that are below the threshold. We will try two approaches.
      • shortest "real" (non-cluster) segment
      • most number of points that doesn't have the max speed in it
  • work outward in both directions, toggling the type of expected segment ("good", "bad")
    • "good" segments can be any length, but "bad" segments are expected to fall within cluster threshold
      • additional check on "good" segments to see whether they are within the major threshold. May be possible if we have enough points
    • if "bad" segment doesn't do this, it is "mixed". Re-segment as first (or last) "cluster" and everything else, and continue
  • verify that all speeds after filtering are within the new major threshold range

Section 18


In [579]:
reload(cjs)


Out[579]:
<module 'emission.analysis.classification.cleaning.jump_smoothing' from '/Users/shankari/e-mission/e-mission-server/emission/analysis/classification/cleaning/jump_smoothing.py'>

In [552]:
train_only_zigzag_result = ls.filter_points(section_train_only_df, cso.BoxplotOutlier(ignore_zeros=True), cjs.SmoothZigzag())


DEBUG:root:quartile values are 0.25    10078.712130
0.75    93479.100745
Name: speed, dtype: float64
DEBUG:root:iqr 83400.3886148
DEBUG:root:For cluster 0 - 1, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 1 - 4, distance = 23266.3577442, is_cluster = False
DEBUG:root:For cluster 4 - 5, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 5 - 7, distance = 0.181351790572, is_cluster = True
DEBUG:root:For cluster 7 - 18, distance = 10697.0211474, is_cluster = False
DEBUG:root:For cluster 18 - 20, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 20 - 23, distance = 77.0287528643, is_cluster = True
DEBUG:root:For cluster 23 - 29, distance = 0.0478251850624, is_cluster = True
DEBUG:root:For cluster 29 - 38, distance = 8402.75466767, is_cluster = False
DEBUG:root:shortest_non_cluster_segment = 8
DEBUG:root:Finished marking segment states for direction IterationDirection.RIGHT 
DEBUG:root:Processing segment 7: Segment(23, 29, 0.0478251850624), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 6
DEBUG:root:Processing segment 6: Segment(20, 23, 77.0287528643), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 5
DEBUG:root:Processing segment 5: Segment(18, 20, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 4
DEBUG:root:Processing segment 4: Segment(7, 18, 10697.0211474), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 3
DEBUG:root:Processing segment 3: Segment(5, 7, 0.181351790572), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 2
DEBUG:root:Processing segment 2: Segment(4, 5, 0.0), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 1
DEBUG:root:Processing segment 1: Segment(1, 4, 23266.3577442), expecting state Segment_State.BAD
DEBUG:root:Recomputed_speed_df = 3         0.000000
2   -262563.895046
1    -31228.151893
Name: speed, dtype: float64
DEBUG:root:new split point = 2
DEBUG:root:For cluster 1 - 3, distance = 1748.77650602, is_cluster = False
DEBUG:root:For cluster 3 - 4, distance = 0.0, is_cluster = True
DEBUG:root:Finishing process for Segment(3, 4, 0.0) after splitting mixed cluster
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 1
DEBUG:root:Processing segment 1: Segment(1, 3, 1748.77650602), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 0
DEBUG:root:Processing segment 0: Segment(0, 1, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = -1
DEBUG:root:Finished marking segment states for direction IterationDirection.LEFT 
DEBUG:root:unknown_segments = []
DEBUG:root:bad_segments = [Segment(0, 1, 0.0), Segment(3, 4, 0.0), Segment(5, 7, 0.181351790572), Segment(18, 20, 0.0), Segment(23, 29, 0.0478251850624)]
DEBUG:root:after setting values, outlier_mask = [ 0  3  5  6 18 19 23 24 25 26 27 28]
DEBUG:root:quartile values are 0.25    15768.440975
0.75    33260.964194
Name: speed, dtype: float64
DEBUG:root:iqr 17492.5232196
filtering points Int64Index([2, 4, 7, 10, 14, 21, 22, 23, 26, 38, 48, 49], dtype='int64')
filtered list size went from (51, 8) to (39, 8)

In [553]:
ipy.inline_map(lo.get_map(train_only_zigzag_result))


Out[553]:

In [554]:
bike_only_zigzag_result = ls.filter_points(section_bike_only_df, cso.BoxplotOutlier(ignore_zeros=True), cjs.SmoothZigzag())


DEBUG:root:quartile values are 0.25     135.766851
0.75    2869.587640
Name: speed, dtype: float64
DEBUG:root:iqr 2733.82078886
DEBUG:root:For cluster 0 - 1, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 1 - 23, distance = 854.106845725, is_cluster = False
DEBUG:root:shortest_non_cluster_segment = 1
DEBUG:root:Finished marking segment states for direction IterationDirection.RIGHT 
DEBUG:root:Processing segment 0: Segment(0, 1, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = -1
DEBUG:root:Finished marking segment states for direction IterationDirection.LEFT 
DEBUG:root:unknown_segments = []
DEBUG:root:bad_segments = [Segment(0, 1, 0.0)]
DEBUG:root:after setting values, outlier_mask = [0]
DEBUG:root:quartile values are 0.25     130.339472
0.75    2821.004219
Name: speed, dtype: float64
DEBUG:root:iqr 2690.66474725
filtering points Int64Index([0, 2], dtype='int64')
filtered list size went from (26, 8) to (24, 8)

In [555]:
ipy.inline_map(lo.get_map(bike_only_zigzag_result))


Out[555]:

In [556]:
section_18_zigzag_result = ls.filter_points(section_18_df, cso.BoxplotOutlier(ignore_zeros=True), cjs.SmoothZigzag())


DEBUG:root:quartile values are 0.25      508.786242
0.75    34515.555837
Name: speed, dtype: float64
DEBUG:root:iqr 34006.7695946
DEBUG:root:For cluster 0 - 1, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 1 - 3, distance = 1748.77650602, is_cluster = False
DEBUG:root:For cluster 3 - 4, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 4 - 5, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 5 - 7, distance = 0.181351790572, is_cluster = True
DEBUG:root:For cluster 7 - 18, distance = 10697.0211474, is_cluster = False
DEBUG:root:For cluster 18 - 20, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 20 - 23, distance = 77.0287528643, is_cluster = True
DEBUG:root:For cluster 23 - 29, distance = 0.0478251850624, is_cluster = True
DEBUG:root:For cluster 29 - 40, distance = 9539.80014176, is_cluster = False
DEBUG:root:For cluster 40 - 41, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 41 - 63, distance = 854.106845725, is_cluster = False
DEBUG:root:shortest_non_cluster_segment = 11
DEBUG:root:Finished marking segment states for direction IterationDirection.RIGHT 
DEBUG:root:Processing segment 10: Segment(40, 41, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 9
DEBUG:root:Processing segment 9: Segment(29, 40, 9539.80014176), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 8
DEBUG:root:Processing segment 8: Segment(23, 29, 0.0478251850624), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 7
DEBUG:root:Processing segment 7: Segment(20, 23, 77.0287528643), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 6
DEBUG:root:Processing segment 6: Segment(18, 20, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 5
DEBUG:root:Processing segment 5: Segment(7, 18, 10697.0211474), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 4
DEBUG:root:Processing segment 4: Segment(5, 7, 0.181351790572), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 3
DEBUG:root:Processing segment 3: Segment(4, 5, 0.0), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 2
DEBUG:root:Processing segment 2: Segment(3, 4, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 1
DEBUG:root:Processing segment 1: Segment(1, 3, 1748.77650602), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 0
DEBUG:root:Processing segment 0: Segment(0, 1, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = -1
DEBUG:root:Finished marking segment states for direction IterationDirection.LEFT 
DEBUG:root:unknown_segments = []
DEBUG:root:bad_segments = [Segment(0, 1, 0.0), Segment(3, 4, 0.0), Segment(5, 7, 0.181351790572), Segment(18, 20, 0.0), Segment(23, 29, 0.0478251850624), Segment(40, 41, 0.0)]
DEBUG:root:after setting values, outlier_mask = [ 0  3  5  6 18 19 23 24 25 26 27 28 40]
DEBUG:root:quartile values are 0.25     1221.404373
0.75    30223.256567
Name: speed, dtype: float64
DEBUG:root:iqr 29001.8521937
filtering points Int64Index([2, 4, 7, 10, 14, 21, 22, 23, 26, 38, 48, 49, 52, 54], dtype='int64')
filtered list size went from (78, 8) to (64, 8)

In [557]:
ipy.inline_map(lo.get_map(section_18_zigzag_result))


Out[557]:

In [558]:
section_12_zigzag_result = ls.filter_points(section_12_df, cso.BoxplotOutlier(ignore_zeros=True), cjs.SmoothZigzag())


DEBUG:root:quartile values are 0.25     2519.376592
0.75    29717.948299
Name: speed, dtype: float64
DEBUG:root:iqr 27198.5717071
DEBUG:root:For cluster 0 - 34, distance = 26189.3973107, is_cluster = False
DEBUG:root:shortest_non_cluster_segment = 0
DEBUG:root:Finished marking segment states for direction IterationDirection.RIGHT 
DEBUG:root:Finished marking segment states for direction IterationDirection.LEFT 
DEBUG:root:unknown_segments = []
DEBUG:root:bad_segments = []
DEBUG:root:after setting values, outlier_mask = []
DEBUG:root:quartile values are 0.25     2519.376592
0.75    29717.948299
Name: speed, dtype: float64
DEBUG:root:iqr 27198.5717071
filtering points Int64Index([1, 2, 3, 6, 7, 8, 9, 10, 11, 15, 22, 25, 30, 31, 32, 33, 34, 36, 39, 41, 46, 47], dtype='int64')
filtered list size went from (57, 8) to (35, 8)

In [559]:
ipy.inline_map(lo.get_map(section_12_zigzag_result))


Out[559]:

In [560]:
section_new_data_zigzag_result = ls.filter_points(section_new_data_df, cso.BoxplotOutlier(ignore_zeros=True), cjs.SmoothZigzag())


DEBUG:root:quartile values are 0.25     7.640136
0.75    20.105116
Name: speed, dtype: float64
DEBUG:root:iqr 12.4649802436
DEBUG:root:For cluster 0 - 8, distance = 14025.4359673, is_cluster = False
DEBUG:root:For cluster 8 - 9, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 9 - 10, distance = 0.0, is_cluster = True
DEBUG:root:For cluster 10 - 29, distance = 31754.9575625, is_cluster = False
DEBUG:root:shortest_non_cluster_segment = 0
DEBUG:root:Processing segment 1: Segment(8, 9, 0.0), expecting state Segment_State.BAD
DEBUG:root:At the end of the loop for direction IterationDirection.RIGHT, i = 2
DEBUG:root:Processing segment 2: Segment(9, 10, 0.0), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.RIGHT, i = 3
DEBUG:root:Processing segment 3: Segment(10, 29, 31754.9575625), expecting state Segment_State.BAD
DEBUG:root:For cluster 12 - 29, distance = 7576.00558761, is_cluster = False
DEBUG:root:For cluster 10 - 12, distance = 0.225164504354, is_cluster = True
DEBUG:root:Finishing process for Segment(10, 12, 0.225164504354) after splitting mixed cluster
DEBUG:root:At the end of the loop for direction IterationDirection.RIGHT, i = 4
DEBUG:root:Processing segment 4: Segment(12, 29, 7576.00558761), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.RIGHT, i = 5
DEBUG:root:Finished marking segment states for direction IterationDirection.RIGHT 
DEBUG:root:Finished marking segment states for direction IterationDirection.LEFT 
DEBUG:root:unknown_segments = []
DEBUG:root:bad_segments = [Segment(8, 9, 0.0), Segment(10, 12, 0.225164504354)]
DEBUG:root:after setting values, outlier_mask = [ 8 10 11]
DEBUG:root:quartile values are 0.25     7.695639
0.75    18.078171
Name: speed, dtype: float64
DEBUG:root:iqr 10.3825320677
filtering points Int64Index([2, 3, 8, 14, 16, 17, 24, 25, 36], dtype='int64')
filtered list size went from (39, 28) to (30, 28)

In [561]:
ipy.inline_map(lo.get_map(section_new_data_zigzag_result))


Out[561]:

In [565]:
section_17_df = ls.filter_accuracy(ls.get_section_points(section_list[17]))


final query = {'metadata.key': 'background/location', '$and': [{'data.mTime': {'$gt': 1425338437.0}}, {'data.mTime': {'$lt': 1425340067.0}}], 'user_id': u'20150302T152037-0800_0', 'metadata.filter': u'time'} 
filtering points Int64Index([14, 15, 18, 22, 31, 32, 33, 35, 36, 38, 39, 40, 41, 42, 43, 44, 45, 49, 50, 51], dtype='int64')
filtered list size went from (54, 8) to (34, 8)

In [567]:
with_speeds_17_df = ls.add_speed(section_17_df)
with_speeds_17_df[27:37]


Out[567]:
filter formatted_time idx key mAccuracy mLatitude mLongitude mTime distance speed
27 time 1970-01-17 03:55:39.497000 35 background/location 26.000000 37.870030 -122.268164 1425339497 25.817396 209.897532
28 time 1970-01-17 03:55:39.589000 38 background/location 29.000000 37.870002 -122.268197 1425339589 4.309879 46.846511
29 time 1970-01-17 03:55:39.875000 47 background/location 85.198997 37.845838 -122.270813 1425339875 2696.713159 9429.066990
30 time 1970-01-17 03:55:39.906000 48 background/location 85.198997 37.845838 -122.270813 1425339906 0.000000 0.000000
31 time 1970-01-17 03:55:39.931000 49 background/location 27.000000 37.834456 -122.266991 1425339931 1309.339670 52373.586799
32 time 1970-01-17 03:55:40.048000 53 background/location 70.500000 37.826092 -122.266484 1425340048 931.101359 7958.131270
33 time 1970-01-17 03:55:40.054000 54 background/location 13.000000 37.825799 -122.267922 1425340054 130.417700 21736.283363

In [568]:
ipy.inline_map(lo.get_map(with_speeds_17_df))


Out[568]:

In [581]:
section_17_zigzag_result = ls.filter_points(section_17_df, cso.BoxplotOutlier(ignore_zeros=True), cjs.SmoothZigzag())


DEBUG:root:quartile values are 0.25     294.581436
0.75    4701.511937
Name: speed, dtype: float64
DEBUG:root:iqr 4406.93050047
DEBUG:root:For cluster 0 - 31, distance = 3353.2718755, is_cluster = False
DEBUG:root:For cluster 31 - 33, distance = 931.101358626, is_cluster = False
DEBUG:root:shortest_non_cluster_segment = 1
DEBUG:root:Finished marking segment states for direction IterationDirection.RIGHT 
DEBUG:root:Processing segment 0: Segment(0, 31, 3353.2718755), expecting state Segment_State.BAD
DEBUG:root:Recomputed_speed_df = 30       0.000000
29      -0.000000
28   -9429.066990
27     -46.846511
26    -209.897532
25    -150.383645
24   -1538.435684
23    -287.799488
22    -690.516227
21    -197.005221
20   -3145.155472
19      -0.000000
18   -1080.067548
17    -253.688564
16    -128.441222
15   -2980.561180
14   -8875.288909
13    -488.810751
12   -2700.959388
11      -0.000000
10    -322.526996
9    -3571.743084
8       -0.000000
7    -5473.196124
6    -2709.924109
5     -301.363385
4       -0.000000
3    -5863.557313
2       -0.000000
1    -1662.678193
0    -3929.827749
Name: speed, dtype: float64
DEBUG:root:new split point = 28
DEBUG:root:For cluster 0 - 29, distance = 946.804185803, is_cluster = False
DEBUG:root:For cluster 29 - 31, distance = 0.0, is_cluster = True
DEBUG:root:Finishing process for Segment(29, 31, 0.0) after splitting mixed cluster
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = 0
DEBUG:root:Processing segment 0: Segment(0, 29, 946.804185803), expecting state Segment_State.GOOD
DEBUG:root:At the end of the loop for direction IterationDirection.LEFT, i = -1
DEBUG:root:Finished marking segment states for direction IterationDirection.LEFT 
DEBUG:root:unknown_segments = []
DEBUG:root:bad_segments = [Segment(29, 31, 0.0)]
DEBUG:root:after setting values, outlier_mask = [29 30]
DEBUG:root:quartile values are 0.25     291.190462
0.75    3840.306583
Name: speed, dtype: float64
DEBUG:root:iqr 3549.11612106
WARNING:root:After first round, still have outliers    filter             formatted_time  idx                  key  mAccuracy  \
33   time 1970-01-17 03:55:40.054000   54  background/location         13   

    mLatitude  mLongitude       mTime  distance         speed  
33  37.825799 -122.267922  1425340054  130.4177  21736.283363  
filtering points Int64Index([], dtype='int64')
filtered list size went from (34, 8) to (34, 8)

In [582]:
ipy.inline_map(lo.get_map(section_17_zigzag_result))


Out[582]:

In [ ]: